cfr fix

2025-12-01 17:57:44 +08:00
parent 547118ec6d
commit 8e4be3bda2
3 changed files with 82 additions and 4 deletions
--- a/src/cfr/calcu.py
+++ b/src/cfr/calcu.py
@@ -0,0 +1,73 @@
+import random
+from src.game.kuhn_poker import KuhnPoker
+from src.cfr.info_set import InfoSet
+
+
+info_map = {} 
+game = KuhnPoker()
+
+def cfr(cards, his, p0, p1):
+
+    if game.is_terminal(his):
+        return game.get_profit(cards, his, 0)
+    
+    player = game.get_cur_player(his)
+    
+    # + player ?
+    info_key = game.get_Info_set(cards[player], his, player)
+    
+    if info_key not in info_map:
+        info_map[info_key] = InfoSet(2) # 0-1
+    info = info_map[info_key]
+    
+    wgt = p0 if player == 0 else p1
+    strat = info.get_strat(wgt)
+    
+
+    act_profit = [0.0, 0.0]
+    info_profit = 0.0 # 期望
+    
+    for action in [0, 1]:
+        next_his = his + str(action)
+        
+        if player == 0:
+            profit = cfr(cards, next_his, p0*strat[action], p1)
+        else:
+            profit = cfr(cards, next_his, p0, p1*strat[action])
+        
+        act_profit[action] = profit
+        info_profit += strat[action]*profit
+
+    # 更新
+    for action in [0, 1]:
+        if player == 0:
+            regret = act_profit[action] - info_profit
+            other_r = p1
+        else:
+            regret = -(act_profit[action] - info_profit)
+            other_r = p0
+        info.regret_sum[action] += other_r * regret
+    
+    return info_profit
+
+
+def test():
+    
+    cards = ['J', 'Q', 'K']
+    p_sum = 0.0
+    
+    for i in range(10):
+        card_r = random.sample(cards, 2)
+        
+        pf = cfr(card_r, "", 1.0, 1.0)
+        p_sum += pf
+        
+        if (i + 1) % 10 == 0:
+            avg_p = p_sum / (i + 1)
+            print(f"Range {i+1}/10, Avg : {avg_p:.3f}")
+    
+    return p_sum / 10
+
+def print_strat():
+    
+
--- a/src/cfr/info_set.py
+++ b/src/cfr/info_set.py
@@ -21,7 +21,10 @@ class InfoSet:
                self.strat[i] /= normal
        else:
            ##
-            return
+            prob = 1.0/self.act_cnt
+            for i in range(self.act_cnt):
+                self.strat[i] = prob
+            # return
        
        for i in range(self.act_cnt):
            self.strat_sum[i] += wgt * self.strat[i]
@@ -37,8 +40,9 @@ class InfoSet:
                avg_strat[i] = self.strat_sum[i] / normal
        else:
            ##
-            return
-
-                
+            prob = 1.0/self.act_cnt
+            for i in range(self.act_cnt):
+                avg_strat[i] = prob
+            # return            
        return avg_strat
    
--- a/src/game/kuhn_poker.py
+++ b/src/game/kuhn_poker.py
@@ -3,6 +3,7 @@ class KuhnPoker:
        self.cards = ['J', 'Q', 'K']
        self.actions = [0, 1]  # Check/Fold=0, Bet/Call=1
    
+    # 结构是不是要修改下？
    def is_terminal(self, history):
        return history in ['00', '10', '010', '011', '11']