cfr: montecar test

2025-12-04 17:44:53 +08:00
parent 33ca6d59b0
commit fe4e025a8a
1 changed files with 104 additions and 0 deletions
--- a/src/cfr/montecar.py
+++ b/src/cfr/montecar.py
@@ -0,0 +1,104 @@
 import random
 from src.game.kuhn_poker import KuhnPoker
 from src.cfr.calcu import test, reset_players, all_avg_strat
 game = KuhnPoker()
 def get_once_util(strat_p0, strat_p1):
    all_card = ['J', 'Q', 'K']
    cards = random.sample(all_card, 2)
    history = ""
    while not game.is_terminal(history):
        player = game.get_cur_player(history)
        card = cards[player]
        info_key = game.get_Info_set(card, history)
        if player == 0:
            if info_key in strat_p0:
                probs = strat_p0[info_key]
            else:
                probs = [0.5, 0.5]
        else:
            probs = strat_p1.get(info_key, [0.5, 0.5])
        action = 0 if random.random() < probs[0] else 1
        history += str(action)
    return game.get_util(cards, history, player=0)
 def game_avg_util(strat_p0, strat_p1, game_cnt=1000):
    total = 0
    for _ in range(game_cnt):
        total += get_once_util(strat_p0, strat_p1)
    return total / game_cnt
 def perturb_infoset(strat, info_key):
    new_strat = strat.copy()
    if info_key in new_strat:
        probs = new_strat[info_key].copy()
        diff = 0.2
        probs[0] += random.uniform(-diff, diff)
        probs[1] = 1 - probs[0]
        probs[0] = max(0.01, min(0.99, probs[0]))
        probs[1] = 1 - probs[0]
        probs[0] = round(probs[0], 4)
        probs[1] = round(1- probs[0], 4)
        new_strat[info_key] = probs
    return new_strat
 def test_one_info(strat_p0, strat_p1, test_cnt, game_cnt, info='J.'):
    print(f"\n{'*'*100}")
    print(f"info_p: {info}")
    print(f"cfr strategy: {strat_p0.get(info)}")
    util_p0 = game_avg_util(strat_p0, strat_p1, game_cnt)
    print(f"cfr utility: {util_p0:.6f}\n")
    print(f"{'='*60}\n")
    print("range\tnew_strat\t\tnew_util\tgain")
    max_gain = 0.0
    best_strat = None
    for i in range(test_cnt):
        new_strat = perturb_infoset(strat_p0, info)
        perturb_util = game_avg_util(new_strat, strat_p1, game_cnt)
        gain = perturb_util - util_p0
        if gain > max_gain:
            max_gain = gain
            best_strat = new_strat
        print(f"{i+1}\t{new_strat[info]}\t\t{perturb_util:<12.6f}\t{gain:+12.6f}")
    print(f"{'='*60}\n")
    print(f"可剥削度: {max_gain:.6f}")
    if best_strat:
        print(f"扰动后增益最大策略: {best_strat[info]}")
    print(f"{'='*60}\n")
 if __name__ == "__main__":
    reset_players()
    test(100000)
    strat_p0 = all_avg_strat(0)
    strat_p1 = all_avg_strat(1)
    test_one_info(strat_p0, strat_p1, 10, 5000, 'J.')
    test_one_info(strat_p0, strat_p1, 10, 5000, 'K.')
    test_one_info(strat_p0, strat_p1, 10, 5000, 'K.01')