from __future__ import annotations import json import random from pathlib import Path from typing import List, Dict, Optional from typing import TYPE_CHECKING if TYPE_CHECKING: from .agent import Agent from .card import Card from .game_stage import GameStage, PlayerState, BlindConfig from .side_pot import SidePotManager from .hand_evaluator import HandEvaluator from .hand_ranking import HandRanking class Simulation: def __init__(self, agents: List[Agent], blind_config: Optional[BlindConfig] = None): self.agents = agents self.history: List[Dict] = [] self.cards: List[Card] = [] self.saved = False # 游戏状态管理 self.current_stage = GameStage.PREFLOP self.player_states: List[PlayerState] = [PlayerState.ACTIVE] * len(agents) self.current_turn = 0 self.betting_round_complete = False # 盲注配置 self.blind_config = blind_config or BlindConfig() # 筹码和底池管理 self.pot: List[int] = [0] * len(agents) # 每个玩家在当前轮的投入 self.total_pot = 0 self.last_raise_amount = 0 self.min_raise = self.blind_config.big_blind self.dealer_position = -1 # 边池管理和筹码 self.side_pot_manager = SidePotManager() self.stacks: List[int] = [1000] * len(agents) # 默认筹码 self.new_round() def new_round(self): self.history = [] self.cards = Card.all_short() random.shuffle(self.cards) # 洗牌 self.saved = False # 重置游戏状态 self.current_stage = GameStage.PREFLOP self.player_states = [PlayerState.ACTIVE] * len(self.agents) self.betting_round_complete = False # 重置下注状态 self.pot = [0] * len(self.agents) self.total_pot = 0 self.last_raise_amount = 0 self.min_raise = self.blind_config.big_blind # 重置边池管理器 self.side_pot_manager.reset() # 设置盲注 self._setup_blinds() # 庄家位置 self.dealer_position = random.choice(range(len(self.agents))) def _setup_blinds(self): num_players = len(self.agents) # 至少需要2个玩家才能设置盲注 if num_players < 2: self.current_turn = 0 if num_players > 0 else 0 return sb_pos = self.blind_config.get_sb_position(num_players,self.dealer_position) bb_pos = self.blind_config.get_bb_position(num_players,self.dealer_position) # 确保位置有效 if sb_pos >= num_players or bb_pos >= num_players: self.current_turn = 0 return # 扣除小盲 sb_amount = min(self.blind_config.small_blind, self.stacks[sb_pos]) self.pot[sb_pos] = sb_amount self.stacks[sb_pos] -= sb_amount self.total_pot += sb_amount self.side_pot_manager.add_investment(sb_pos, sb_amount) self.history.append({ "pid": sb_pos, "action": "small_blind", "amount": sb_amount }) # 扣除大盲 bb_amount = min(self.blind_config.big_blind, self.stacks[bb_pos]) self.pot[bb_pos] = bb_amount self.stacks[bb_pos] -= bb_amount self.total_pot += bb_amount self.side_pot_manager.add_investment(bb_pos, bb_amount) self.history.append({ "pid": bb_pos, "action": "big_blind", "amount": bb_amount }) # 首个行动玩家 self.current_turn = self.blind_config.get_first_to_act(self.current_stage, num_players, self.dealer_position) self.last_raise_amount = self.blind_config.big_blind def player_cards(self, pid) -> List[Card]: return self.cards[pid * 2 : (pid * 2 + 2)] def board_cards(self, street) -> List[Card]: nplayers = len(self.agents) idx_start = nplayers * 2 if street == "flop": return self.cards[idx_start: idx_start + 3] if street == "turn": return self.cards[idx_start: idx_start + 4] if street == "river": return self.cards[idx_start: idx_start + 5] return [] def get_current_max_bet(self) -> int: return max(self.pot) if self.pot else 0 def get_call_amount(self, pid) -> int: """ 跟注金额 """ if pid >= len(self.pot): return 0 max_pot = self.get_current_max_bet() return max(0, max_pot - self.pot[pid]) def get_min_raise_amount(self, pid) -> int: """最小加注金额""" call_amount = self.get_call_amount(pid) min_raise = call_amount + max(self.last_raise_amount, self.blind_config.big_blind) return min_raise def get_max_bet_amount(self, pid) -> int: """最大下注金额(剩余筹码)""" if pid >= len(self.stacks): return 0 return self.stacks[pid] def is_all_in_amount(self, pid, amount) -> bool: """检查是否为allin""" return amount >= self.stacks[pid] def validate_bet_amount(self, pid, action, amount) -> tuple[bool, str, int]: """ 验证下注金额合法性 """ if pid >= len(self.stacks): return False, "无效玩家", amount available_stack = self.stacks[pid] call_amount = self.get_call_amount(pid) if action == "fold": return True, "", 0 elif action == "check": if call_amount > 0: return False, "不能过牌,需跟注或弃牌", 0 return True, "", 0 elif action == "call": if call_amount == 0: return False, "不需要跟注", 0 # All-in call if call_amount >= available_stack: return True, "", available_stack return True, "", call_amount elif action in ["bet", "raise"]: if amount <= 0: return False, "无效下注金额", amount # allin if amount >= available_stack: return True, "", available_stack if action == "raise": min_raise = self.get_min_raise_amount(pid) if amount < min_raise: return False, f"最小加注金额为 {min_raise}", amount if action == "bet" and max(self.pot) == 0: if amount < self.blind_config.big_blind: return False, f"最小下注金额为 {self.blind_config.big_blind}", amount return True, "", amount return False, "无效行为", amount def get_available_actions(self, pid: int) -> dict: if pid != self.current_turn: return {"can_act": False, "reason": "不是你的回合"} if pid >= len(self.player_states): return {"can_act": False, "reason": "无效玩家"} state = self.player_states[pid] if state in [PlayerState.FOLDED, PlayerState.ALLIN, PlayerState.OUT]: return {"can_act": False, "reason": f"Player state: {state}"} call_amount = self.get_call_amount(pid) available_stack = self.stacks[pid] actions = { "can_act": True, "can_fold": True, "can_check": call_amount == 0, "can_call": call_amount > 0 and call_amount < available_stack, "can_bet": max(self.pot) == 0 and available_stack > 0, "can_raise": call_amount > 0 and available_stack > call_amount, "can_allin": available_stack > 0, "call_amount": call_amount, "min_bet": self.blind_config.big_blind if max(self.pot) == 0 else 0, "min_raise": self.get_min_raise_amount(pid) if call_amount > 0 else 0, "max_bet": available_stack, "stack": available_stack } return actions def is_betting_round_complete(self) -> bool: """ 检查当前下注轮是否完成 """ active_players = [i for i, state in enumerate(self.player_states) if state in (PlayerState.ACTIVE, PlayerState.CALLED)] if len(active_players) <= 1: return True # 检查所有active玩家是否都已投入相同金额,且所有人都已经行动过 max_pot = self.get_current_max_bet() # 统计还需要行动的玩家 players_need_action = [] for i in active_players: # allin if self.player_states[i] == PlayerState.ALLIN: continue # 投入金额不足的玩家需要行动 if self.pot[i] < max_pot: players_need_action.append(i) # Active状态的玩家如果还没有在本轮行动过,也需要行动 elif self.player_states[i] == PlayerState.ACTIVE: # 在翻前,大盲玩家即使投入了足够金额,也有权行动一次 if (self.current_stage == GameStage.PREFLOP and i == self.blind_config.get_bb_position(len(self.agents), self.dealer_position)): # 检查大盲是否已经行动过(除了盲注) bb_actions = [h for h in self.history if h.get('pid') == i and h.get('action') not in ['big_blind']] if not bb_actions: players_need_action.append(i) return len(players_need_action) == 0 def advance_to_next_street(self): if self.current_stage == GameStage.FINISHED: return next_stage = GameStage.get_next_stage(self.current_stage) if next_stage is None: self.current_stage = GameStage.FINISHED self.complete_hand() return self.current_stage = next_stage active_players = self.get_active_players() if len(active_players) <= 1: self.current_stage = GameStage.FINISHED self.complete_hand() return # 重置下注轮状态 self.betting_round_complete = False # 重置行动状态 for i, state in enumerate(self.player_states): if state == PlayerState.CALLED: self.player_states[i] = PlayerState.ACTIVE # 首个行动玩家 num_players = len(self.agents) self.current_turn = self.blind_config.get_first_to_act(self.current_stage, num_players, self.dealer_position) self.last_raise_amount = 0 self.min_raise = self.blind_config.big_blind def get_next_active_player(self, start_pos) -> Optional[int]: for i in range(len(self.agents)): pos = (start_pos + i) % len(self.agents) if self.player_states[pos] == PlayerState.ACTIVE: return pos return None def get_side_pots(self) -> List: active_players = [ i for i, state in enumerate(self.player_states) if state not in [PlayerState.FOLDED, PlayerState.OUT] ] return self.side_pot_manager.create_side_pots(active_players) def node_info(self) -> Dict: if self.current_turn >= len(self.pot): return {"bet_min": self.min_raise, "bet_max": 0, "call_amount": 0} actions = self.get_available_actions(self.current_turn) return { "bet_min": actions.get("min_bet", self.min_raise), "bet_max": actions.get("max_bet", 100), "call_amount": actions.get("call_amount", 0) } def apply_action(self, pid, action, amount): if pid != self.current_turn: raise ValueError(f"不是玩家 {pid} 的回合") if self.player_states[pid] not in (PlayerState.ACTIVE,): raise ValueError(f"玩家 {pid} 无法行动,当前状态: {self.player_states[pid]}") action = action.lower() # 验证动作合法性 is_valid, error_msg, adjusted_amount = self.validate_bet_amount(pid, action, amount or 0) if not is_valid: raise ValueError(error_msg) # 使用调整后的金额 amount = adjusted_amount self.history.append({"pid": pid, "action": action, "amount": amount}) if action == "fold": self.player_states[pid] = PlayerState.FOLDED elif action == "call": call_amount = self.get_call_amount(pid) if call_amount == 0: # check self.history[-1]["action"] = "check" self.player_states[pid] = PlayerState.CALLED else: # 检查是否all-in actual_amount = min(call_amount, self.stacks[pid]) if actual_amount >= self.stacks[pid]: self.player_states[pid] = PlayerState.ALLIN else: self.player_states[pid] = PlayerState.CALLED self.pot[pid] += actual_amount self.stacks[pid] -= actual_amount self.total_pot += actual_amount self.side_pot_manager.add_investment(pid, actual_amount) elif action == "check": call_amount = self.get_call_amount(pid) if call_amount > 0: raise ValueError("跟注金额>0, 无法过牌,需要跟注或弃牌") self.player_states[pid] = PlayerState.CALLED elif action in ("bet", "raise"): if amount is None: raise ValueError(f"{action} 需要指定金额") # 检查是否all-in actual_amount = min(amount, self.stacks[pid]) if actual_amount >= self.stacks[pid]: self.player_states[pid] = PlayerState.ALLIN else: self.player_states[pid] = PlayerState.CALLED self.pot[pid] += actual_amount self.stacks[pid] -= actual_amount self.total_pot += actual_amount self.side_pot_manager.add_investment(pid, actual_amount) # 更新最后加注金额 call_amount = self.get_call_amount(pid) raise_amount = actual_amount - call_amount if raise_amount > 0: self.last_raise_amount = raise_amount self.min_raise = raise_amount for i, state in enumerate(self.player_states): if i != pid and state == PlayerState.CALLED: self.player_states[i] = PlayerState.ACTIVE else: raise ValueError(f"未知动作: {action}") # 下一个玩家 self._advance_turn() def _advance_turn(self): """ 推进回合 """ # 检查下注轮是否完成 if self.is_betting_round_complete(): self.betting_round_complete = True self.advance_to_next_street() else: # 找到下一个可行动玩家 next_player = self.get_next_active_player(self.current_turn + 1) if next_player is not None: self.current_turn = next_player else: # 没有玩家需要行动,结束下注轮 self.betting_round_complete = True self.advance_to_next_street() def to_save_data(self) -> Dict: players = [f"Agent{a.pid}" for a in self.agents] return { "history": self.history, "players": players, "player_cards": ["".join(str(c) for c in self.player_cards(i)) for i in range(len(self.agents))], "board": "".join(str(c) for c in self.board_cards("river")), } def dump_data(self, path: Path | None = None): if self.saved: return if path is None: path = Path.cwd() / "shortdeck_arena_history.jsonl" with path.open("a", encoding="utf-8") as f: f.write(json.dumps(self.to_save_data())) f.write("\n") self.saved = True def evaluate_player_hand(self, pid: int) -> Optional[HandRanking]: """评估玩家手牌强度""" if pid >= len(self.agents): return None if self.player_states[pid] == PlayerState.FOLDED: return None try: # 获取玩家手牌 player_cards = self.player_cards(pid) # 获取公共牌 board_cards = self.board_cards(self.current_stage.value) # 至少需要5张牌才能评估 all_cards = player_cards + board_cards if len(all_cards) < 5: return None # 如果正好5张牌,直接评估 if len(all_cards) == 5: return HandEvaluator.evaluate5Cards(all_cards) # 如果超过5张牌,找最佳组合 return HandEvaluator.evaluateHand(all_cards) except Exception as e: print(f"评估玩家 {pid} 手牌时出错: {e}") return None def get_active_players(self) -> List[int]: return [i for i, state in enumerate(self.player_states) if state not in [PlayerState.FOLDED, PlayerState.OUT]] def is_hand_complete(self) -> bool: active_players = self.get_active_players() if len(active_players) <= 1: return True # 到达河牌且所有下注完成 if (self.current_stage == GameStage.FINISHED or (self.current_stage == GameStage.RIVER and self.betting_round_complete)): return True return False def determine_winners(self) -> Dict[int, HandRanking]: active_players = self.get_active_players() if not active_players: return {} if len(active_players) == 1: return {active_players[0]: None} # 不需要摊牌 # 多人摊牌 hand_rankings = {} for pid in active_players: ranking = self.evaluate_player_hand(pid) if ranking is not None: hand_rankings[pid] = ranking return hand_rankings def distribute_pot(self) -> Dict[int, int]: winners = self.determine_winners() if not winners: return {} # 只有一人获胜(其他人弃牌) if len(winners) == 1 and list(winners.values())[0] is None: winner_id = list(winners.keys())[0] return {winner_id: self.total_pot} # 多人摊牌,使用边池分配 if len(winners) > 1: #转换HandRanking为数值强度 hand_strengths = {} for pid, ranking in winners.items(): if ranking is not None: hand_strengths[pid] = ranking.get_strength() else: hand_strengths[pid] = 0 # 弃牌玩家 return self.side_pot_manager.distribute_winnings(hand_strengths) return {} def complete_hand(self) -> Dict: if not self.is_hand_complete(): return {"complete": False, "message": "牌局未结束"} winners = self.determine_winners() winnings = self.distribute_pot() # 更新筹码 for pid, amount in winnings.items(): if pid < len(self.stacks): self.stacks[pid] += amount self.current_stage = GameStage.FINISHED result = { "complete": True, "winners": list(winners.keys()), "winnings": winnings, "final_stacks": self.stacks.copy(), "showdown_hands": {} } # 摊牌信息 for pid, ranking in winners.items(): if ranking is not None: result["showdown_hands"][pid] = { "cards": [str(card) for card in self.player_cards(pid)], "hand_type": ranking.hand_type.type_name, "description": str(ranking) } return result