Upload 73 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +5 -0
- Imitation Learning Tic Tac Toe AI 2/analyze_ttt_model.py +377 -0
- Imitation Learning Tic Tac Toe AI 2/create_pretrained_ai.py +129 -0
- Imitation Learning Tic Tac Toe AI 2/improved_game.py +529 -0
- Imitation Learning Tic Tac Toe AI 2/requirements.txt +2 -0
- Imitation Learning Tic Tac Toe AI 2/ttt_ai_model_improved.pkl +3 -0
- Imitation Learning Tic Tac Toe AI 2/ttt_diagnostic.py +351 -0
- Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.30.57 PM.png +3 -0
- Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.32 PM.png +3 -0
- Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.43 PM.png +3 -0
- Imitation Learning Tic Tac Toe AI/analyze_ttt_model.py +377 -0
- Imitation Learning Tic Tac Toe AI/requirements.txt +2 -0
- Imitation Learning Tic Tac Toe AI/tic_tac_toe_ai.py +483 -0
- Imitation Learning Tic Tac Toe AI/ttt_ai_model.pkl +3 -0
- Imitation Learning Tic Tac Toe AI/ttt_diagnostic.py +351 -0
- Tic Tac Toe RL/app.py +721 -0
- Tic Tac Toe RL/eval_models.py +464 -0
- Tic Tac Toe RL/model_evaluation_results.csv +52 -0
- Tic Tac Toe RL/model_performance_analysis.png +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_100.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1000.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1100.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1200.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1300.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1400.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1500.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1600.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1700.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1800.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_1900.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_200.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2000.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2100.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2200.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2300.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2400.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2500.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2600.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2700.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2800.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_2900.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_300.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3000.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3100.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3200.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3300.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3400.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3500.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3600.pth +3 -0
- Tic Tac Toe RL/models/dueling_dqn_episode_3700.pth +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.30.57 PM.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.33.32 PM.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.33.43 PM.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
output.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]RL/model_performance_analysis.png filter=lfs diff=lfs merge=lfs -text
|
Imitation Learning Tic Tac Toe AI 2/analyze_ttt_model.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
from collections import deque
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
def load_pickle_file(filename="ttt_ai_model.pkl"):
|
| 8 |
+
"""
|
| 9 |
+
Load and analyze the Tic-Tac-Toe AI model pickle file.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
filename (str): Path to the pickle file
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
dict: The loaded data or None if file doesn't exist
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
if not os.path.exists(filename):
|
| 19 |
+
print(f"❌ File '{filename}' not found!")
|
| 20 |
+
print("Possible reasons:")
|
| 21 |
+
print("1. The game hasn't been played yet")
|
| 22 |
+
print("2. The file was saved with a different name")
|
| 23 |
+
print("3. The file is in a different directory")
|
| 24 |
+
return None
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
print(f"📂 Opening '{filename}'...")
|
| 28 |
+
|
| 29 |
+
# Load the pickle file
|
| 30 |
+
with open(filename, 'rb') as f:
|
| 31 |
+
data = pickle.load(f)
|
| 32 |
+
|
| 33 |
+
print("✅ File loaded successfully!")
|
| 34 |
+
print("\n" + "="*60)
|
| 35 |
+
|
| 36 |
+
return data
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"❌ Error loading pickle file: {e}")
|
| 40 |
+
print(f"Error type: {type(e).__name__}")
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
def analyze_model(data):
|
| 44 |
+
"""
|
| 45 |
+
Analyze and display information about the AI model.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
data (dict): The loaded pickle data
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
if not data:
|
| 52 |
+
print("No data to analyze")
|
| 53 |
+
return
|
| 54 |
+
|
| 55 |
+
print("📊 MODEL ANALYSIS")
|
| 56 |
+
print("="*60)
|
| 57 |
+
|
| 58 |
+
# Check what keys are available
|
| 59 |
+
print(f"Keys in data: {list(data.keys())}")
|
| 60 |
+
|
| 61 |
+
# Analyze model matrix if present
|
| 62 |
+
if 'model' in data:
|
| 63 |
+
model = data['model']
|
| 64 |
+
print(f"\n🤖 AI Model Information:")
|
| 65 |
+
print(f" Shape: {model.shape}")
|
| 66 |
+
print(f" Size: {model.size:,} elements")
|
| 67 |
+
print(f" Data type: {model.dtype}")
|
| 68 |
+
|
| 69 |
+
# Calculate some statistics
|
| 70 |
+
print(f"\n📈 Model Statistics:")
|
| 71 |
+
print(f" Non-zero entries: {np.count_nonzero(model):,}")
|
| 72 |
+
print(f" Zero entries: {np.sum(model == 0):,}")
|
| 73 |
+
print(f" Sparsity: {(np.sum(model == 0) / model.size) * 100:.2f}%")
|
| 74 |
+
|
| 75 |
+
# Get min, max, mean values
|
| 76 |
+
if model.size > 0:
|
| 77 |
+
flat_model = model.flatten()
|
| 78 |
+
non_zero_values = flat_model[flat_model != 0]
|
| 79 |
+
|
| 80 |
+
if len(non_zero_values) > 0:
|
| 81 |
+
print(f" Min value (non-zero): {non_zero_values.min():.6f}")
|
| 82 |
+
print(f" Max value: {flat_model.max():.6f}")
|
| 83 |
+
print(f" Mean value (non-zero): {non_zero_values.mean():.6f}")
|
| 84 |
+
print(f" Std dev (non-zero): {non_zero_values.std():.6f}")
|
| 85 |
+
|
| 86 |
+
# Count of positive vs negative values
|
| 87 |
+
positive = np.sum(flat_model > 0)
|
| 88 |
+
negative = np.sum(flat_model < 0)
|
| 89 |
+
print(f" Positive values: {positive:,}")
|
| 90 |
+
print(f" Negative values: {negative:,}")
|
| 91 |
+
|
| 92 |
+
# Analyze experience replay if present
|
| 93 |
+
if 'experience' in data:
|
| 94 |
+
experience = data['experience']
|
| 95 |
+
print(f"\n🎮 Experience Replay Buffer:")
|
| 96 |
+
print(f" Number of experiences: {len(experience):,}")
|
| 97 |
+
|
| 98 |
+
if experience:
|
| 99 |
+
# Show first few experiences
|
| 100 |
+
print(f" Sample experience (first):")
|
| 101 |
+
if hasattr(experience[0], '__len__'):
|
| 102 |
+
print(f" Length: {len(experience[0])}")
|
| 103 |
+
if len(experience[0]) > 0:
|
| 104 |
+
print(f" First element type: {type(experience[0][0])}")
|
| 105 |
+
|
| 106 |
+
# Check for other data
|
| 107 |
+
for key in data.keys():
|
| 108 |
+
if key not in ['model', 'experience']:
|
| 109 |
+
value = data[key]
|
| 110 |
+
print(f"\n🔍 {key}:")
|
| 111 |
+
print(f" Type: {type(value)}")
|
| 112 |
+
if isinstance(value, (list, tuple, deque)):
|
| 113 |
+
print(f" Length: {len(value)}")
|
| 114 |
+
elif isinstance(value, dict):
|
| 115 |
+
print(f" Keys: {list(value.keys())[:5]}..." if len(value) > 5 else f" Keys: {list(value.keys())}")
|
| 116 |
+
elif isinstance(value, np.ndarray):
|
| 117 |
+
print(f" Shape: {value.shape}")
|
| 118 |
+
|
| 119 |
+
def examine_specific_states(model, num_states=5):
|
| 120 |
+
"""
|
| 121 |
+
Examine specific state-action values in the model.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
model (np.ndarray): The AI model
|
| 125 |
+
num_states (int): Number of states to examine
|
| 126 |
+
"""
|
| 127 |
+
|
| 128 |
+
print(f"\n🔬 Examining {num_states} specific states:")
|
| 129 |
+
print("-"*40)
|
| 130 |
+
|
| 131 |
+
# Find states with non-zero values
|
| 132 |
+
non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
|
| 133 |
+
|
| 134 |
+
if len(non_zero_indices) > 0:
|
| 135 |
+
print(f"Found {len(non_zero_indices):,} states with learned values")
|
| 136 |
+
|
| 137 |
+
# Sample some states to examine
|
| 138 |
+
if len(non_zero_indices) > num_states:
|
| 139 |
+
sample_indices = np.random.choice(non_zero_indices, num_states, replace=False)
|
| 140 |
+
else:
|
| 141 |
+
sample_indices = non_zero_indices
|
| 142 |
+
|
| 143 |
+
for i, state_idx in enumerate(sample_indices):
|
| 144 |
+
q_values = model[state_idx]
|
| 145 |
+
non_zero_q = q_values[q_values != 0]
|
| 146 |
+
|
| 147 |
+
if len(non_zero_q) > 0:
|
| 148 |
+
print(f"\nState {i+1} (Index {state_idx}):")
|
| 149 |
+
print(f" Non-zero Q-values: {len(non_zero_q)}")
|
| 150 |
+
print(f" Actions with values:")
|
| 151 |
+
for action in np.where(q_values != 0)[0]:
|
| 152 |
+
print(f" Action {action}: {q_values[action]:.4f}")
|
| 153 |
+
else:
|
| 154 |
+
print("No states with learned values found yet.")
|
| 155 |
+
|
| 156 |
+
def decode_state(state_index):
|
| 157 |
+
"""
|
| 158 |
+
Convert a state index back to a board representation.
|
| 159 |
+
This assumes the same encoding used in the game.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
state_index (int): The encoded state index
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
list: Board representation (0=empty, 1=X, 2=O)
|
| 166 |
+
"""
|
| 167 |
+
board = [0] * 9
|
| 168 |
+
temp_index = state_index
|
| 169 |
+
|
| 170 |
+
for i in range(9):
|
| 171 |
+
board[i] = temp_index % 3
|
| 172 |
+
temp_index //= 3
|
| 173 |
+
|
| 174 |
+
return board
|
| 175 |
+
|
| 176 |
+
def display_board(board):
|
| 177 |
+
"""
|
| 178 |
+
Display a Tic-Tac-Toe board in human-readable format.
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
board (list): Board representation
|
| 182 |
+
"""
|
| 183 |
+
symbols = {0: '.', 1: 'X', 2: 'O'}
|
| 184 |
+
|
| 185 |
+
print("Board state:")
|
| 186 |
+
for row in range(3):
|
| 187 |
+
row_chars = [symbols[board[row*3 + col]] for col in range(3)]
|
| 188 |
+
print(" " + " | ".join(row_chars))
|
| 189 |
+
if row < 2:
|
| 190 |
+
print(" " + "-" * 9)
|
| 191 |
+
|
| 192 |
+
def explore_model_interactively(model):
|
| 193 |
+
"""
|
| 194 |
+
Interactive exploration of the model.
|
| 195 |
+
|
| 196 |
+
Args:
|
| 197 |
+
model (np.ndarray): The AI model
|
| 198 |
+
"""
|
| 199 |
+
|
| 200 |
+
print("\n🎯 INTERACTIVE EXPLORATION")
|
| 201 |
+
print("="*60)
|
| 202 |
+
|
| 203 |
+
while True:
|
| 204 |
+
print("\nOptions:")
|
| 205 |
+
print("1. Look up a specific state")
|
| 206 |
+
print("2. Find states with highest Q-values")
|
| 207 |
+
print("3. Find best action for a given state")
|
| 208 |
+
print("4. Exit exploration")
|
| 209 |
+
|
| 210 |
+
choice = input("\nEnter your choice (1-4): ").strip()
|
| 211 |
+
|
| 212 |
+
if choice == '1':
|
| 213 |
+
try:
|
| 214 |
+
state_idx = int(input("Enter state index (0-19682): "))
|
| 215 |
+
if 0 <= state_idx < model.shape[0]:
|
| 216 |
+
board = decode_state(state_idx)
|
| 217 |
+
display_board(board)
|
| 218 |
+
|
| 219 |
+
q_values = model[state_idx]
|
| 220 |
+
print(f"\nQ-values for state {state_idx}:")
|
| 221 |
+
for action in range(9):
|
| 222 |
+
if q_values[action] != 0:
|
| 223 |
+
print(f" Action {action} (row {action//3}, col {action%3}): {q_values[action]:.4f}")
|
| 224 |
+
|
| 225 |
+
# Show best action
|
| 226 |
+
best_action = np.argmax(q_values)
|
| 227 |
+
print(f"\nBest action: {best_action} (row {best_action//3}, col {best_action%3})")
|
| 228 |
+
else:
|
| 229 |
+
print("Invalid state index!")
|
| 230 |
+
except ValueError:
|
| 231 |
+
print("Please enter a valid number!")
|
| 232 |
+
|
| 233 |
+
elif choice == '2':
|
| 234 |
+
try:
|
| 235 |
+
num_states = int(input("How many top states? (1-100): "))
|
| 236 |
+
num_states = max(1, min(100, num_states))
|
| 237 |
+
|
| 238 |
+
# Find states with maximum Q-values
|
| 239 |
+
max_q_per_state = np.max(model, axis=1)
|
| 240 |
+
top_indices = np.argsort(max_q_per_state)[-num_states:][::-1]
|
| 241 |
+
|
| 242 |
+
print(f"\nTop {num_states} states with highest Q-values:")
|
| 243 |
+
for i, idx in enumerate(top_indices[:10]): # Show first 10
|
| 244 |
+
max_q = max_q_per_state[idx]
|
| 245 |
+
if max_q > 0:
|
| 246 |
+
board = decode_state(idx)
|
| 247 |
+
print(f"\n{i+1}. State {idx} (max Q: {max_q:.4f})")
|
| 248 |
+
display_board(board)
|
| 249 |
+
except ValueError:
|
| 250 |
+
print("Please enter a valid number!")
|
| 251 |
+
|
| 252 |
+
elif choice == '3':
|
| 253 |
+
# Create a board manually
|
| 254 |
+
print("\nEnter board state (9 numbers, 0=empty, 1=X, 2=O)")
|
| 255 |
+
print("Example: 0 0 1 0 2 0 0 0 0")
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
board_input = input("Board: ").strip()
|
| 259 |
+
if len(board_input) == 0:
|
| 260 |
+
# Use default example
|
| 261 |
+
board = [0, 0, 1, 0, 2, 0, 0, 0, 0]
|
| 262 |
+
else:
|
| 263 |
+
board = [int(x) for x in board_input.split()]
|
| 264 |
+
|
| 265 |
+
if len(board) != 9 or any(x not in [0, 1, 2] for x in board):
|
| 266 |
+
raise ValueError("Invalid board")
|
| 267 |
+
|
| 268 |
+
display_board(board)
|
| 269 |
+
|
| 270 |
+
# Convert to state index
|
| 271 |
+
state_idx = 0
|
| 272 |
+
for i, cell in enumerate(board):
|
| 273 |
+
state_idx += cell * (3 ** i)
|
| 274 |
+
|
| 275 |
+
q_values = model[state_idx]
|
| 276 |
+
|
| 277 |
+
# Only show available moves
|
| 278 |
+
available_moves = [i for i, cell in enumerate(board) if cell == 0]
|
| 279 |
+
|
| 280 |
+
print("\nAvailable moves and their Q-values:")
|
| 281 |
+
for move in available_moves:
|
| 282 |
+
q_val = q_values[move]
|
| 283 |
+
row, col = divmod(move, 3)
|
| 284 |
+
print(f" Move {move} (row {row}, col {col}): {q_val:.4f}")
|
| 285 |
+
|
| 286 |
+
if available_moves:
|
| 287 |
+
best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
|
| 288 |
+
row, col = divmod(best_move, 3)
|
| 289 |
+
print(f"\nRecommended move: {best_move} (row {row}, col {col})")
|
| 290 |
+
else:
|
| 291 |
+
print("No available moves!")
|
| 292 |
+
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"Error: {e}")
|
| 295 |
+
|
| 296 |
+
elif choice == '4':
|
| 297 |
+
print("Exiting interactive exploration.")
|
| 298 |
+
break
|
| 299 |
+
|
| 300 |
+
else:
|
| 301 |
+
print("Invalid choice!")
|
| 302 |
+
|
| 303 |
+
def save_model_summary(data, filename="model_summary.json"):
|
| 304 |
+
"""
|
| 305 |
+
Save a summary of the model to a JSON file.
|
| 306 |
+
|
| 307 |
+
Args:
|
| 308 |
+
data (dict): The loaded pickle data
|
| 309 |
+
filename (str): Output JSON filename
|
| 310 |
+
"""
|
| 311 |
+
if not data:
|
| 312 |
+
return
|
| 313 |
+
|
| 314 |
+
summary = {}
|
| 315 |
+
|
| 316 |
+
if 'model' in data:
|
| 317 |
+
model = data['model']
|
| 318 |
+
summary['model'] = {
|
| 319 |
+
'shape': model.shape,
|
| 320 |
+
'size': int(model.size),
|
| 321 |
+
'non_zero_entries': int(np.count_nonzero(model)),
|
| 322 |
+
'sparsity': float((np.sum(model == 0) / model.size) * 100)
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
if 'experience' in data:
|
| 326 |
+
experience = data['experience']
|
| 327 |
+
summary['experience'] = {
|
| 328 |
+
'count': len(experience)
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
with open(filename, 'w') as f:
|
| 333 |
+
json.dump(summary, f, indent=2)
|
| 334 |
+
print(f"\n💾 Model summary saved to '{filename}'")
|
| 335 |
+
except Exception as e:
|
| 336 |
+
print(f"Error saving summary: {e}")
|
| 337 |
+
|
| 338 |
+
def main():
|
| 339 |
+
"""
|
| 340 |
+
Main function to load and analyze the pickle file.
|
| 341 |
+
"""
|
| 342 |
+
print("🔍 Tic-Tac-Toe AI Model Analyzer")
|
| 343 |
+
print("="*60)
|
| 344 |
+
|
| 345 |
+
# Try to load the pickle file
|
| 346 |
+
filename = "ttt_ai_model.pkl"
|
| 347 |
+
data = load_pickle_file(filename)
|
| 348 |
+
|
| 349 |
+
if data:
|
| 350 |
+
# Analyze the model
|
| 351 |
+
analyze_model(data)
|
| 352 |
+
|
| 353 |
+
# If model exists, do more detailed analysis
|
| 354 |
+
if 'model' in data:
|
| 355 |
+
# Examine specific states
|
| 356 |
+
examine_specific_states(data['model'])
|
| 357 |
+
|
| 358 |
+
# Interactive exploration
|
| 359 |
+
explore = input("\nWould you like to explore the model interactively? (y/n): ").strip().lower()
|
| 360 |
+
if explore == 'y':
|
| 361 |
+
explore_model_interactively(data['model'])
|
| 362 |
+
|
| 363 |
+
# Save summary
|
| 364 |
+
save = input("\nWould you like to save a summary? (y/n): ").strip().lower()
|
| 365 |
+
if save == 'y':
|
| 366 |
+
save_model_summary(data)
|
| 367 |
+
|
| 368 |
+
# Additional file info
|
| 369 |
+
print("\n📄 File Information:")
|
| 370 |
+
print(f" File size: {os.path.getsize(filename):,} bytes")
|
| 371 |
+
print(f" Last modified: {os.path.getmtime(filename):.0f}")
|
| 372 |
+
|
| 373 |
+
print("\n" + "="*60)
|
| 374 |
+
print("Analysis complete!")
|
| 375 |
+
|
| 376 |
+
if __name__ == "__main__":
|
| 377 |
+
main()
|
Imitation Learning Tic Tac Toe AI 2/create_pretrained_ai.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import numpy as np
|
| 3 |
+
import random
|
| 4 |
+
|
| 5 |
+
def create_basic_ai_knowledge():
|
| 6 |
+
"""Create a basic Tic-Tac-Toe AI with fundamental strategies"""
|
| 7 |
+
|
| 8 |
+
q_table = {}
|
| 9 |
+
|
| 10 |
+
# 1. Empty board - prefer center and corners
|
| 11 |
+
empty_board = (0,0,0,0,0,0,0,0,0)
|
| 12 |
+
q_values = [0.0] * 9
|
| 13 |
+
q_values[4] = 0.8 # Center is best
|
| 14 |
+
q_values[0] = q_values[2] = q_values[6] = q_values[8] = 0.7 # Corners are good
|
| 15 |
+
q_values[1] = q_values[3] = q_values[5] = q_values[7] = 0.5 # Edges are okay
|
| 16 |
+
q_table[empty_board] = q_values
|
| 17 |
+
|
| 18 |
+
# 2. Opponent in center - take corners
|
| 19 |
+
center_taken = (0,0,0,0,1,0,0,0,0) # X in center
|
| 20 |
+
q_values = [0.0] * 9
|
| 21 |
+
q_values[0] = q_values[2] = q_values[6] = q_values[8] = 0.9 # Corners are best
|
| 22 |
+
q_table[center_taken] = q_values
|
| 23 |
+
|
| 24 |
+
# 3. Winning moves - very high value
|
| 25 |
+
# Example: two O's in a row/col/diag
|
| 26 |
+
winning_patterns = [
|
| 27 |
+
# Horizontal
|
| 28 |
+
(2,2,0,0,0,0,0,0,0), # Need position 2
|
| 29 |
+
(0,2,2,0,0,0,0,0,0), # Need position 0
|
| 30 |
+
(2,0,2,0,0,0,0,0,0), # Need position 1
|
| 31 |
+
|
| 32 |
+
# Vertical
|
| 33 |
+
(2,0,0,2,0,0,0,0,0), # Need position 6
|
| 34 |
+
(0,2,0,0,2,0,0,0,0), # Need position 7
|
| 35 |
+
(0,0,2,0,0,2,0,0,0), # Need position 8
|
| 36 |
+
|
| 37 |
+
# Diagonal
|
| 38 |
+
(2,0,0,0,2,0,0,0,0), # Need position 8
|
| 39 |
+
(0,0,2,0,2,0,0,0,0), # Need position 6
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
for board in winning_patterns:
|
| 43 |
+
q_values = [0.0] * 9
|
| 44 |
+
# Find empty spot that completes the line
|
| 45 |
+
for i in range(9):
|
| 46 |
+
if board[i] == 0:
|
| 47 |
+
# Check if this completes three in a row
|
| 48 |
+
test_board = list(board)
|
| 49 |
+
test_board[i] = 2
|
| 50 |
+
|
| 51 |
+
# Check if this is a winning move
|
| 52 |
+
winning = False
|
| 53 |
+
lines = [
|
| 54 |
+
[0,1,2], [3,4,5], [6,7,8], # Rows
|
| 55 |
+
[0,3,6], [1,4,7], [2,5,8], # Columns
|
| 56 |
+
[0,4,8], [2,4,6] # Diagonals
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
for line in lines:
|
| 60 |
+
if (test_board[line[0]] == test_board[line[1]] ==
|
| 61 |
+
test_board[line[2]] == 2):
|
| 62 |
+
winning = True
|
| 63 |
+
break
|
| 64 |
+
|
| 65 |
+
if winning:
|
| 66 |
+
q_values[i] = 1.0 # Very high value for winning move
|
| 67 |
+
|
| 68 |
+
q_table[board] = q_values
|
| 69 |
+
|
| 70 |
+
# 4. Blocking moves - high value
|
| 71 |
+
blocking_patterns = [
|
| 72 |
+
# Block horizontal
|
| 73 |
+
(1,1,0,0,0,0,0,0,0), # Block at 2
|
| 74 |
+
(0,1,1,0,0,0,0,0,0), # Block at 0
|
| 75 |
+
(1,0,1,0,0,0,0,0,0), # Block at 1
|
| 76 |
+
|
| 77 |
+
# Block vertical
|
| 78 |
+
(1,0,0,1,0,0,0,0,0), # Block at 6
|
| 79 |
+
(0,1,0,0,1,0,0,0,0), # Block at 7
|
| 80 |
+
(0,0,1,0,0,1,0,0,0), # Block at 8
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
for board in blocking_patterns:
|
| 84 |
+
q_values = [0.0] * 9
|
| 85 |
+
# Find blocking move
|
| 86 |
+
for i in range(9):
|
| 87 |
+
if board[i] == 0:
|
| 88 |
+
# Check if this blocks opponent
|
| 89 |
+
test_board = list(board)
|
| 90 |
+
test_board[i] = 1 # Temporarily place opponent's piece
|
| 91 |
+
|
| 92 |
+
# Check if opponent would win
|
| 93 |
+
opponent_wins = False
|
| 94 |
+
lines = [
|
| 95 |
+
[0,1,2], [3,4,5], [6,7,8],
|
| 96 |
+
[0,3,6], [1,4,7], [2,5,8],
|
| 97 |
+
[0,4,8], [2,4,6]
|
| 98 |
+
]
|
| 99 |
+
|
| 100 |
+
for line in lines:
|
| 101 |
+
if (test_board[line[0]] == test_board[line[1]] ==
|
| 102 |
+
test_board[line[2]] == 1):
|
| 103 |
+
opponent_wins = True
|
| 104 |
+
break
|
| 105 |
+
|
| 106 |
+
if opponent_wins:
|
| 107 |
+
q_values[i] = 0.9 # High value for blocking
|
| 108 |
+
|
| 109 |
+
q_table[board] = q_values
|
| 110 |
+
|
| 111 |
+
# Save the pre-trained AI
|
| 112 |
+
data = {
|
| 113 |
+
'q_table': q_table,
|
| 114 |
+
'training_history': [],
|
| 115 |
+
'player_symbol': 2
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
with open('ttt_ai_pretrained.pkl', 'wb') as f:
|
| 119 |
+
pickle.dump(data, f)
|
| 120 |
+
|
| 121 |
+
print(f"Created pre-trained AI with {len(q_table)} board states")
|
| 122 |
+
print("Basic strategies included:")
|
| 123 |
+
print("1. Prefer center and corners")
|
| 124 |
+
print("2. Take corners when opponent has center")
|
| 125 |
+
print("3. Recognize winning moves")
|
| 126 |
+
print("4. Recognize blocking moves")
|
| 127 |
+
|
| 128 |
+
if __name__ == '__main__':
|
| 129 |
+
create_basic_ai_knowledge()
|
Imitation Learning Tic Tac Toe AI 2/improved_game.py
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import numpy as np
|
| 3 |
+
import random
|
| 4 |
+
from collections import defaultdict
|
| 5 |
+
import pickle
|
| 6 |
+
import os
|
| 7 |
+
from PyQt5.QtWidgets import *
|
| 8 |
+
from PyQt5.QtCore import *
|
| 9 |
+
from PyQt5.QtGui import *
|
| 10 |
+
|
| 11 |
+
class TicTacToeAI:
|
| 12 |
+
"""Improved AI with proper imitation learning"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, player_symbol=2): # Default is O
|
| 15 |
+
self.q_table = defaultdict(lambda: [0.0] * 9) # State -> [Q-values for 9 moves]
|
| 16 |
+
self.learning_rate = 0.3 # Increased for faster learning
|
| 17 |
+
self.exploration_rate = 0.3 # Start with exploration
|
| 18 |
+
self.discount_factor = 0.9
|
| 19 |
+
self.player_symbol = player_symbol
|
| 20 |
+
self.last_state = None
|
| 21 |
+
self.last_action = None
|
| 22 |
+
self.training_history = []
|
| 23 |
+
|
| 24 |
+
self.load_model()
|
| 25 |
+
|
| 26 |
+
def board_to_key(self, board):
|
| 27 |
+
"""Convert board to hashable key"""
|
| 28 |
+
return tuple(board)
|
| 29 |
+
|
| 30 |
+
def get_available_moves(self, board):
|
| 31 |
+
"""Get list of available positions"""
|
| 32 |
+
return [i for i, cell in enumerate(board) if cell == 0]
|
| 33 |
+
|
| 34 |
+
def choose_action(self, board, available_moves):
|
| 35 |
+
"""Choose action using epsilon-greedy policy"""
|
| 36 |
+
board_key = self.board_to_key(board)
|
| 37 |
+
|
| 38 |
+
# Exploration: random move
|
| 39 |
+
if random.random() < self.exploration_rate:
|
| 40 |
+
action = random.choice(available_moves)
|
| 41 |
+
# Exploitation: best known move
|
| 42 |
+
else:
|
| 43 |
+
q_values = self.q_table[board_key]
|
| 44 |
+
# Filter to available moves
|
| 45 |
+
available_q = [(q_values[move], move) for move in available_moves]
|
| 46 |
+
# Choose move with highest Q-value
|
| 47 |
+
action = max(available_q, key=lambda x: x[0])[1]
|
| 48 |
+
|
| 49 |
+
# Store for learning
|
| 50 |
+
self.last_state = board_key
|
| 51 |
+
self.last_action = action
|
| 52 |
+
|
| 53 |
+
return action
|
| 54 |
+
|
| 55 |
+
def learn(self, reward, next_board, game_over):
|
| 56 |
+
"""Q-learning update"""
|
| 57 |
+
if self.last_state is None or self.last_action is None:
|
| 58 |
+
return
|
| 59 |
+
|
| 60 |
+
board_key = self.last_state
|
| 61 |
+
action = self.last_action
|
| 62 |
+
|
| 63 |
+
# Current Q-value
|
| 64 |
+
current_q = self.q_table[board_key][action]
|
| 65 |
+
|
| 66 |
+
if game_over:
|
| 67 |
+
# Terminal state, no future rewards
|
| 68 |
+
future_q = 0
|
| 69 |
+
else:
|
| 70 |
+
# Estimate future reward
|
| 71 |
+
next_key = self.board_to_key(next_board)
|
| 72 |
+
next_available = self.get_available_moves(next_board)
|
| 73 |
+
if next_available:
|
| 74 |
+
future_q = max(self.q_table[next_key][move] for move in next_available)
|
| 75 |
+
else:
|
| 76 |
+
future_q = 0
|
| 77 |
+
|
| 78 |
+
# Q-learning update
|
| 79 |
+
new_q = current_q + self.learning_rate * (reward + self.discount_factor * future_q - current_q)
|
| 80 |
+
self.q_table[board_key][action] = new_q
|
| 81 |
+
|
| 82 |
+
# Record for analysis
|
| 83 |
+
self.training_history.append({
|
| 84 |
+
'state': board_key,
|
| 85 |
+
'action': action,
|
| 86 |
+
'reward': reward,
|
| 87 |
+
'new_q': new_q
|
| 88 |
+
})
|
| 89 |
+
|
| 90 |
+
# Clear for next move
|
| 91 |
+
self.last_state = None
|
| 92 |
+
self.last_action = None
|
| 93 |
+
|
| 94 |
+
def learn_from_observation(self, board_before, action, board_after, winner):
|
| 95 |
+
"""Learn by observing human player moves"""
|
| 96 |
+
board_key = self.board_to_key(board_before)
|
| 97 |
+
|
| 98 |
+
# Determine reward based on game outcome
|
| 99 |
+
if winner == self.player_symbol: # AI's symbol won
|
| 100 |
+
reward = 1.0
|
| 101 |
+
elif winner == 3 - self.player_symbol: # Opponent won
|
| 102 |
+
reward = -1.0
|
| 103 |
+
elif winner is None: # Draw
|
| 104 |
+
reward = 0.1
|
| 105 |
+
else:
|
| 106 |
+
reward = 0
|
| 107 |
+
|
| 108 |
+
# Update Q-value
|
| 109 |
+
current_q = self.q_table[board_key][action]
|
| 110 |
+
new_q = current_q + self.learning_rate * (reward - current_q)
|
| 111 |
+
self.q_table[board_key][action] = new_q
|
| 112 |
+
|
| 113 |
+
def save_model(self):
|
| 114 |
+
"""Save Q-table to file"""
|
| 115 |
+
try:
|
| 116 |
+
# Convert defaultdict to regular dict for pickling
|
| 117 |
+
q_table_dict = dict(self.q_table)
|
| 118 |
+
data = {
|
| 119 |
+
'q_table': q_table_dict,
|
| 120 |
+
'training_history': self.training_history[-1000:], # Keep last 1000
|
| 121 |
+
'player_symbol': self.player_symbol
|
| 122 |
+
}
|
| 123 |
+
with open('ttt_ai_improved.pkl', 'wb') as f:
|
| 124 |
+
pickle.dump(data, f)
|
| 125 |
+
print(f"Model saved with {len(q_table_dict)} states")
|
| 126 |
+
except Exception as e:
|
| 127 |
+
print(f"Error saving model: {e}")
|
| 128 |
+
|
| 129 |
+
def load_model(self):
|
| 130 |
+
"""Load Q-table from file"""
|
| 131 |
+
filename = 'ttt_ai_improved.pkl'
|
| 132 |
+
if os.path.exists(filename):
|
| 133 |
+
try:
|
| 134 |
+
with open(filename, 'rb') as f:
|
| 135 |
+
data = pickle.load(f)
|
| 136 |
+
self.q_table = defaultdict(lambda: [0.0] * 9, data.get('q_table', {}))
|
| 137 |
+
self.training_history = data.get('training_history', [])
|
| 138 |
+
self.player_symbol = data.get('player_symbol', 2)
|
| 139 |
+
print(f"Model loaded with {len(self.q_table)} states")
|
| 140 |
+
except Exception as e:
|
| 141 |
+
print(f"Error loading model: {e}")
|
| 142 |
+
|
| 143 |
+
class TicTacToeGame:
|
| 144 |
+
"""Game logic - unchanged"""
|
| 145 |
+
|
| 146 |
+
def __init__(self):
|
| 147 |
+
self.reset()
|
| 148 |
+
|
| 149 |
+
def reset(self):
|
| 150 |
+
self.board = [0] * 9
|
| 151 |
+
self.current_player = 1
|
| 152 |
+
self.winner = None
|
| 153 |
+
self.game_over = False
|
| 154 |
+
self.moves = 0
|
| 155 |
+
|
| 156 |
+
def make_move(self, position):
|
| 157 |
+
if self.board[position] != 0 or self.game_over:
|
| 158 |
+
return False
|
| 159 |
+
|
| 160 |
+
self.board[position] = self.current_player
|
| 161 |
+
self.moves += 1
|
| 162 |
+
|
| 163 |
+
self.winner = self.check_winner()
|
| 164 |
+
if self.winner or self.moves == 9:
|
| 165 |
+
self.game_over = True
|
| 166 |
+
else:
|
| 167 |
+
self.current_player = 3 - self.current_player
|
| 168 |
+
|
| 169 |
+
return True
|
| 170 |
+
|
| 171 |
+
def check_winner(self):
|
| 172 |
+
winning_combinations = [
|
| 173 |
+
[0, 1, 2], [3, 4, 5], [6, 7, 8],
|
| 174 |
+
[0, 3, 6], [1, 4, 7], [2, 5, 8],
|
| 175 |
+
[0, 4, 8], [2, 4, 6]
|
| 176 |
+
]
|
| 177 |
+
|
| 178 |
+
for combo in winning_combinations:
|
| 179 |
+
if (self.board[combo[0]] == self.board[combo[1]] ==
|
| 180 |
+
self.board[combo[2]] != 0):
|
| 181 |
+
return 'X' if self.board[combo[0]] == 1 else 'O'
|
| 182 |
+
|
| 183 |
+
return None
|
| 184 |
+
|
| 185 |
+
def get_board_state(self):
|
| 186 |
+
return self.board.copy()
|
| 187 |
+
|
| 188 |
+
class ImprovedGame(QMainWindow):
|
| 189 |
+
"""Improved game with working imitation learning"""
|
| 190 |
+
|
| 191 |
+
def __init__(self):
|
| 192 |
+
super().__init__()
|
| 193 |
+
self.game = TicTacToeGame()
|
| 194 |
+
self.ai = TicTacToeAI(player_symbol=2) # AI plays as O
|
| 195 |
+
|
| 196 |
+
# Training parameters
|
| 197 |
+
self.training_mode = True
|
| 198 |
+
self.observation_mode = True # Learn from human moves
|
| 199 |
+
self.games_played = 0
|
| 200 |
+
self.ai_wins = 0
|
| 201 |
+
self.human_wins = 0
|
| 202 |
+
self.ties = 0
|
| 203 |
+
|
| 204 |
+
self.init_ui()
|
| 205 |
+
self.start_new_game()
|
| 206 |
+
|
| 207 |
+
def init_ui(self):
|
| 208 |
+
self.setWindowTitle('Improved Imitation Learning Tic-Tac-Toe')
|
| 209 |
+
self.setGeometry(100, 100, 450, 600)
|
| 210 |
+
|
| 211 |
+
central_widget = QWidget()
|
| 212 |
+
self.setCentralWidget(central_widget)
|
| 213 |
+
layout = QVBoxLayout()
|
| 214 |
+
|
| 215 |
+
# Game board
|
| 216 |
+
self.board_widget = self.create_board()
|
| 217 |
+
layout.addWidget(self.board_widget)
|
| 218 |
+
|
| 219 |
+
# Status
|
| 220 |
+
status_layout = QHBoxLayout()
|
| 221 |
+
self.status_label = QLabel("Your turn (X)")
|
| 222 |
+
self.status_label.setFont(QFont('Arial', 14))
|
| 223 |
+
status_layout.addWidget(self.status_label)
|
| 224 |
+
|
| 225 |
+
self.stats_label = QLabel("Games: 0 | AI: 0 | You: 0 | Ties: 0")
|
| 226 |
+
status_layout.addWidget(self.stats_label)
|
| 227 |
+
layout.addLayout(status_layout)
|
| 228 |
+
|
| 229 |
+
# Training controls
|
| 230 |
+
controls = QHBoxLayout()
|
| 231 |
+
|
| 232 |
+
self.train_btn = QPushButton("Training: ON")
|
| 233 |
+
self.train_btn.clicked.connect(self.toggle_training)
|
| 234 |
+
controls.addWidget(self.train_btn)
|
| 235 |
+
|
| 236 |
+
self.observe_btn = QPushButton("Learn from You: ON")
|
| 237 |
+
self.observe_btn.clicked.connect(self.toggle_observation)
|
| 238 |
+
controls.addWidget(self.observe_btn)
|
| 239 |
+
|
| 240 |
+
self.new_game_btn = QPushButton("New Game")
|
| 241 |
+
self.new_game_btn.clicked.connect(self.start_new_game)
|
| 242 |
+
controls.addWidget(self.new_game_btn)
|
| 243 |
+
|
| 244 |
+
self.save_btn = QPushButton("Save AI")
|
| 245 |
+
self.save_btn.clicked.connect(self.save_ai)
|
| 246 |
+
controls.addWidget(self.save_btn)
|
| 247 |
+
|
| 248 |
+
layout.addLayout(controls)
|
| 249 |
+
|
| 250 |
+
# Learning parameters
|
| 251 |
+
params = QGridLayout()
|
| 252 |
+
|
| 253 |
+
params.addWidget(QLabel("Learning Rate:"), 0, 0)
|
| 254 |
+
self.lr_slider = QSlider(Qt.Horizontal)
|
| 255 |
+
self.lr_slider.setRange(1, 50)
|
| 256 |
+
self.lr_slider.setValue(int(self.ai.learning_rate * 100))
|
| 257 |
+
self.lr_slider.valueChanged.connect(self.update_learning_rate)
|
| 258 |
+
params.addWidget(self.lr_slider, 0, 1)
|
| 259 |
+
|
| 260 |
+
params.addWidget(QLabel("Exploration:"), 1, 0)
|
| 261 |
+
self.exp_slider = QSlider(Qt.Horizontal)
|
| 262 |
+
self.exp_slider.setRange(0, 100)
|
| 263 |
+
self.exp_slider.setValue(int(self.ai.exploration_rate * 100))
|
| 264 |
+
self.exp_slider.valueChanged.connect(self.update_exploration)
|
| 265 |
+
params.addWidget(self.exp_slider, 1, 1)
|
| 266 |
+
|
| 267 |
+
layout.addLayout(params)
|
| 268 |
+
|
| 269 |
+
# Learning log
|
| 270 |
+
self.log_text = QTextEdit()
|
| 271 |
+
self.log_text.setMaximumHeight(150)
|
| 272 |
+
self.log_text.setReadOnly(True)
|
| 273 |
+
layout.addWidget(self.log_text)
|
| 274 |
+
|
| 275 |
+
central_widget.setLayout(layout)
|
| 276 |
+
|
| 277 |
+
# AI move timer
|
| 278 |
+
self.ai_timer = QTimer()
|
| 279 |
+
self.ai_timer.timeout.connect(self.ai_move)
|
| 280 |
+
|
| 281 |
+
self.log("AI initialized. Play as X to train the AI!")
|
| 282 |
+
self.log(f"AI knows {len(self.ai.q_table)} board states")
|
| 283 |
+
|
| 284 |
+
def create_board(self):
|
| 285 |
+
widget = QWidget()
|
| 286 |
+
grid = QGridLayout()
|
| 287 |
+
grid.setSpacing(5)
|
| 288 |
+
|
| 289 |
+
self.buttons = []
|
| 290 |
+
for i in range(9):
|
| 291 |
+
btn = QPushButton('')
|
| 292 |
+
btn.setFixedSize(100, 100)
|
| 293 |
+
btn.setFont(QFont('Arial', 24))
|
| 294 |
+
btn.clicked.connect(lambda checked, pos=i: self.human_move(pos))
|
| 295 |
+
|
| 296 |
+
row, col = divmod(i, 3)
|
| 297 |
+
grid.addWidget(btn, row, col)
|
| 298 |
+
self.buttons.append(btn)
|
| 299 |
+
|
| 300 |
+
widget.setLayout(grid)
|
| 301 |
+
return widget
|
| 302 |
+
|
| 303 |
+
def update_board(self):
|
| 304 |
+
"""Update button display from game state"""
|
| 305 |
+
symbols = {0: '', 1: 'X', 2: 'O'}
|
| 306 |
+
colors = {0: 'black', 1: 'red', 2: 'blue'}
|
| 307 |
+
|
| 308 |
+
for i, btn in enumerate(self.buttons):
|
| 309 |
+
symbol = symbols[self.game.board[i]]
|
| 310 |
+
color = colors[self.game.board[i]]
|
| 311 |
+
btn.setText(symbol)
|
| 312 |
+
btn.setStyleSheet(f"color: {color}; font-weight: bold;")
|
| 313 |
+
|
| 314 |
+
def human_move(self, position):
|
| 315 |
+
"""Handle human player move"""
|
| 316 |
+
if self.game.game_over or self.game.current_player != 1:
|
| 317 |
+
return
|
| 318 |
+
|
| 319 |
+
# Record board before move for learning
|
| 320 |
+
board_before = self.game.get_board_state()
|
| 321 |
+
|
| 322 |
+
if self.game.make_move(position):
|
| 323 |
+
self.update_board()
|
| 324 |
+
|
| 325 |
+
# If learning from observation is enabled
|
| 326 |
+
if self.training_mode and self.observation_mode:
|
| 327 |
+
# The AI learns from the human move
|
| 328 |
+
self.ai.learn_from_observation(
|
| 329 |
+
board_before,
|
| 330 |
+
position,
|
| 331 |
+
self.game.get_board_state(),
|
| 332 |
+
None # Game not over yet
|
| 333 |
+
)
|
| 334 |
+
self.log(f"AI observed your move at {position}")
|
| 335 |
+
|
| 336 |
+
if self.game.game_over:
|
| 337 |
+
self.end_game()
|
| 338 |
+
else:
|
| 339 |
+
# AI's turn
|
| 340 |
+
self.status_label.setText("AI thinking...")
|
| 341 |
+
self.ai_timer.start(300) # Shorter delay
|
| 342 |
+
|
| 343 |
+
def ai_move(self):
|
| 344 |
+
"""Handle AI player move"""
|
| 345 |
+
self.ai_timer.stop()
|
| 346 |
+
|
| 347 |
+
if self.game.game_over or self.game.current_player != 2:
|
| 348 |
+
return
|
| 349 |
+
|
| 350 |
+
# Get available moves
|
| 351 |
+
available_moves = [i for i, cell in enumerate(self.game.board) if cell == 0]
|
| 352 |
+
|
| 353 |
+
if available_moves:
|
| 354 |
+
# Choose action
|
| 355 |
+
action = self.ai.choose_action(self.game.board, available_moves)
|
| 356 |
+
|
| 357 |
+
# Record state before move for Q-learning
|
| 358 |
+
board_before = self.game.get_board_state()
|
| 359 |
+
|
| 360 |
+
if self.game.make_move(action):
|
| 361 |
+
self.update_board()
|
| 362 |
+
|
| 363 |
+
# Q-learning update
|
| 364 |
+
if self.training_mode:
|
| 365 |
+
# Determine reward
|
| 366 |
+
if self.game.game_over:
|
| 367 |
+
if self.game.winner == 'O':
|
| 368 |
+
reward = 1.0 # AI won
|
| 369 |
+
elif self.game.winner == 'X':
|
| 370 |
+
reward = -1.0 # AI lost
|
| 371 |
+
else:
|
| 372 |
+
reward = 0.1 # Draw
|
| 373 |
+
else:
|
| 374 |
+
reward = 0 # Intermediate move
|
| 375 |
+
|
| 376 |
+
# Update Q-values
|
| 377 |
+
self.ai.learn(reward, self.game.get_board_state(), self.game.game_over)
|
| 378 |
+
|
| 379 |
+
if self.game.game_over:
|
| 380 |
+
self.end_game()
|
| 381 |
+
else:
|
| 382 |
+
self.status_label.setText("Your turn (X)")
|
| 383 |
+
self.log(f"AI moved to {action}")
|
| 384 |
+
|
| 385 |
+
def end_game(self):
|
| 386 |
+
"""Handle game end"""
|
| 387 |
+
winner = self.game.winner
|
| 388 |
+
|
| 389 |
+
# Update statistics
|
| 390 |
+
self.games_played += 1
|
| 391 |
+
if winner == 'X':
|
| 392 |
+
self.human_wins += 1
|
| 393 |
+
result = "You win!"
|
| 394 |
+
elif winner == 'O':
|
| 395 |
+
self.ai_wins += 1
|
| 396 |
+
result = "AI wins!"
|
| 397 |
+
# Strong positive reinforcement for winning
|
| 398 |
+
if self.training_mode:
|
| 399 |
+
self.log("AI won! Giving strong positive reward")
|
| 400 |
+
else:
|
| 401 |
+
self.ties += 1
|
| 402 |
+
result = "It's a tie!"
|
| 403 |
+
|
| 404 |
+
self.update_stats()
|
| 405 |
+
|
| 406 |
+
# Final Q-learning update for the last move
|
| 407 |
+
if self.training_mode and winner is not None:
|
| 408 |
+
# Determine final reward for AI
|
| 409 |
+
final_reward = 1.0 if winner == 'O' else -1.0 if winner == 'X' else 0.1
|
| 410 |
+
self.ai.learn(final_reward, self.game.board, True)
|
| 411 |
+
|
| 412 |
+
# Also learn from the complete game if observation mode is on
|
| 413 |
+
if self.observation_mode:
|
| 414 |
+
self.log(f"AI learned from {result}")
|
| 415 |
+
|
| 416 |
+
# Update status
|
| 417 |
+
self.status_label.setText(result)
|
| 418 |
+
|
| 419 |
+
# Highlight winning cells
|
| 420 |
+
if winner:
|
| 421 |
+
self.highlight_winner()
|
| 422 |
+
|
| 423 |
+
# Offer new game
|
| 424 |
+
QTimer.singleShot(1000, self.offer_new_game)
|
| 425 |
+
|
| 426 |
+
def highlight_winner(self):
|
| 427 |
+
"""Highlight winning combination"""
|
| 428 |
+
winning_combinations = [
|
| 429 |
+
[0, 1, 2], [3, 4, 5], [6, 7, 8],
|
| 430 |
+
[0, 3, 6], [1, 4, 7], [2, 5, 8],
|
| 431 |
+
[0, 4, 8], [2, 4, 6]
|
| 432 |
+
]
|
| 433 |
+
|
| 434 |
+
for combo in winning_combinations:
|
| 435 |
+
if (self.game.board[combo[0]] == self.game.board[combo[1]] ==
|
| 436 |
+
self.game.board[combo[2]] != 0):
|
| 437 |
+
for pos in combo:
|
| 438 |
+
self.buttons[pos].setStyleSheet(
|
| 439 |
+
"background-color: lightgreen; font-weight: bold;"
|
| 440 |
+
)
|
| 441 |
+
break
|
| 442 |
+
|
| 443 |
+
def offer_new_game(self):
|
| 444 |
+
"""Ask if player wants to play again"""
|
| 445 |
+
msg = QMessageBox()
|
| 446 |
+
msg.setWindowTitle("Game Over")
|
| 447 |
+
msg.setText(f"{self.status_label.text()}")
|
| 448 |
+
msg.setInformativeText("Play again?")
|
| 449 |
+
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
|
| 450 |
+
|
| 451 |
+
if msg.exec_() == QMessageBox.Yes:
|
| 452 |
+
self.start_new_game()
|
| 453 |
+
|
| 454 |
+
def start_new_game(self):
|
| 455 |
+
"""Start a new game"""
|
| 456 |
+
self.game.reset()
|
| 457 |
+
self.update_board()
|
| 458 |
+
|
| 459 |
+
# Reset button colors
|
| 460 |
+
for btn in self.buttons:
|
| 461 |
+
btn.setStyleSheet("")
|
| 462 |
+
|
| 463 |
+
# Always let human start
|
| 464 |
+
self.status_label.setText("Your turn (X)")
|
| 465 |
+
|
| 466 |
+
# Gradually reduce exploration
|
| 467 |
+
if self.games_played > 20:
|
| 468 |
+
self.ai.exploration_rate = max(0.1, self.ai.exploration_rate * 0.95)
|
| 469 |
+
self.exp_slider.setValue(int(self.ai.exploration_rate * 100))
|
| 470 |
+
|
| 471 |
+
self.log(f"New game started (Game {self.games_played + 1})")
|
| 472 |
+
self.log(f"AI exploration: {self.ai.exploration_rate:.2f}")
|
| 473 |
+
|
| 474 |
+
def toggle_training(self):
|
| 475 |
+
"""Toggle training mode"""
|
| 476 |
+
self.training_mode = not self.training_mode
|
| 477 |
+
self.train_btn.setText(f"Training: {'ON' if self.training_mode else 'OFF'}")
|
| 478 |
+
self.log(f"Training mode {'enabled' if self.training_mode else 'disabled'}")
|
| 479 |
+
|
| 480 |
+
def toggle_observation(self):
|
| 481 |
+
"""Toggle learning from human moves"""
|
| 482 |
+
self.observation_mode = not self.observation_mode
|
| 483 |
+
self.observe_btn.setText(f"Learn from You: {'ON' if self.observation_mode else 'OFF'}")
|
| 484 |
+
self.log(f"Learning from your moves {'enabled' if self.observation_mode else 'disabled'}")
|
| 485 |
+
|
| 486 |
+
def update_learning_rate(self, value):
|
| 487 |
+
"""Update learning rate"""
|
| 488 |
+
self.ai.learning_rate = value / 100.0
|
| 489 |
+
self.log(f"Learning rate: {self.ai.learning_rate:.2f}")
|
| 490 |
+
|
| 491 |
+
def update_exploration(self, value):
|
| 492 |
+
"""Update exploration rate"""
|
| 493 |
+
self.ai.exploration_rate = value / 100.0
|
| 494 |
+
self.log(f"Exploration rate: {self.ai.exploration_rate:.2f}")
|
| 495 |
+
|
| 496 |
+
def update_stats(self):
|
| 497 |
+
"""Update statistics display"""
|
| 498 |
+
self.stats_label.setText(
|
| 499 |
+
f"Games: {self.games_played} | "
|
| 500 |
+
f"AI: {self.ai_wins} | "
|
| 501 |
+
f"You: {self.human_wins} | "
|
| 502 |
+
f"Ties: {self.ties}"
|
| 503 |
+
)
|
| 504 |
+
|
| 505 |
+
def save_ai(self):
|
| 506 |
+
"""Save AI model"""
|
| 507 |
+
self.ai.save_model()
|
| 508 |
+
self.log(f"AI model saved! Knows {len(self.ai.q_table)} states")
|
| 509 |
+
|
| 510 |
+
def log(self, message):
|
| 511 |
+
"""Add message to log"""
|
| 512 |
+
self.log_text.append(f"[Game {self.games_played}] {message}")
|
| 513 |
+
|
| 514 |
+
def main():
|
| 515 |
+
app = QApplication(sys.argv)
|
| 516 |
+
app.setStyle('Fusion')
|
| 517 |
+
|
| 518 |
+
# Set a nice theme
|
| 519 |
+
palette = QPalette()
|
| 520 |
+
palette.setColor(QPalette.Window, QColor(240, 240, 240))
|
| 521 |
+
palette.setColor(QPalette.WindowText, Qt.black)
|
| 522 |
+
app.setPalette(palette)
|
| 523 |
+
|
| 524 |
+
game = ImprovedGame()
|
| 525 |
+
game.show()
|
| 526 |
+
sys.exit(app.exec_())
|
| 527 |
+
|
| 528 |
+
if __name__ == '__main__':
|
| 529 |
+
main()
|
Imitation Learning Tic Tac Toe AI 2/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PyQt5
|
| 2 |
+
numpy
|
Imitation Learning Tic Tac Toe AI 2/ttt_ai_model_improved.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0b251454a01165321853d8e32658ef4a96b694bdb74eed00f4060d7cd331743
|
| 3 |
+
size 1417366
|
Imitation Learning Tic Tac Toe AI 2/ttt_diagnostic.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def load_ai_model():
|
| 6 |
+
"""Load the AI model from pickle file"""
|
| 7 |
+
filename = "ttt_ai_model.pkl"
|
| 8 |
+
if not os.path.exists(filename):
|
| 9 |
+
print("Model file not found!")
|
| 10 |
+
return None
|
| 11 |
+
|
| 12 |
+
with open(filename, 'rb') as f:
|
| 13 |
+
data = pickle.load(f)
|
| 14 |
+
|
| 15 |
+
return data['model']
|
| 16 |
+
|
| 17 |
+
def decode_state(state_index):
|
| 18 |
+
"""Convert state index to board representation"""
|
| 19 |
+
board = [0] * 9
|
| 20 |
+
temp_index = state_index
|
| 21 |
+
|
| 22 |
+
for i in range(9):
|
| 23 |
+
board[i] = temp_index % 3
|
| 24 |
+
temp_index //= 3
|
| 25 |
+
|
| 26 |
+
return board
|
| 27 |
+
|
| 28 |
+
def display_board(board):
|
| 29 |
+
"""Display Tic-Tac-Toe board"""
|
| 30 |
+
symbols = {0: '.', 1: 'X', 2: 'O'}
|
| 31 |
+
|
| 32 |
+
print("Current board:")
|
| 33 |
+
for row in range(3):
|
| 34 |
+
row_chars = [symbols[board[row*3 + col]] for col in range(3)]
|
| 35 |
+
print(" " + " | ".join(row_chars))
|
| 36 |
+
if row < 2:
|
| 37 |
+
print(" " + "-" * 9)
|
| 38 |
+
|
| 39 |
+
def test_ai_with_common_scenarios(model):
|
| 40 |
+
"""Test AI with common Tic-Tac-Toe scenarios"""
|
| 41 |
+
|
| 42 |
+
print("\n🤖 TESTING AI WITH COMMON SCENARIOS")
|
| 43 |
+
print("="*50)
|
| 44 |
+
|
| 45 |
+
test_cases = [
|
| 46 |
+
# Empty board
|
| 47 |
+
{
|
| 48 |
+
"name": "Empty board - first move",
|
| 49 |
+
"board": [0,0,0,0,0,0,0,0,0],
|
| 50 |
+
"expected": "Center (4) or corners (0,2,6,8)"
|
| 51 |
+
},
|
| 52 |
+
# Center taken by opponent
|
| 53 |
+
{
|
| 54 |
+
"name": "Opponent took center",
|
| 55 |
+
"board": [0,0,0,0,1,0,0,0,0],
|
| 56 |
+
"expected": "A corner (0,2,6,8)"
|
| 57 |
+
},
|
| 58 |
+
# Winning move for AI
|
| 59 |
+
{
|
| 60 |
+
"name": "AI can win in one move",
|
| 61 |
+
"board": [2,1,0, # O X .
|
| 62 |
+
1,2,0, # X O .
|
| 63 |
+
0,0,0], # . . .
|
| 64 |
+
"expected": "Move 8 to complete diagonal"
|
| 65 |
+
},
|
| 66 |
+
# Block opponent's winning move
|
| 67 |
+
{
|
| 68 |
+
"name": "Block opponent's winning move",
|
| 69 |
+
"board": [1,0,0, # X . .
|
| 70 |
+
1,2,0, # X O .
|
| 71 |
+
0,0,0], # . . .
|
| 72 |
+
"expected": "Move 6 to block vertical"
|
| 73 |
+
},
|
| 74 |
+
# Fork opportunity
|
| 75 |
+
{
|
| 76 |
+
"name": "Fork opportunity",
|
| 77 |
+
"board": [2,0,1, # O . X
|
| 78 |
+
0,1,0, # . X .
|
| 79 |
+
0,0,0], # . . .
|
| 80 |
+
"expected": "Move 8 to create fork"
|
| 81 |
+
}
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
for test in test_cases:
|
| 85 |
+
print(f"\n📋 {test['name']}")
|
| 86 |
+
display_board(test['board'])
|
| 87 |
+
|
| 88 |
+
# Convert board to state index
|
| 89 |
+
state_idx = 0
|
| 90 |
+
for i, cell in enumerate(test['board']):
|
| 91 |
+
state_idx += cell * (3 ** i)
|
| 92 |
+
|
| 93 |
+
# Get Q-values for this state
|
| 94 |
+
q_values = model[state_idx]
|
| 95 |
+
|
| 96 |
+
# Get available moves
|
| 97 |
+
available_moves = [i for i, cell in enumerate(test['board']) if cell == 0]
|
| 98 |
+
|
| 99 |
+
if available_moves:
|
| 100 |
+
print("\nAvailable moves and Q-values:")
|
| 101 |
+
for move in available_moves:
|
| 102 |
+
q_val = q_values[move]
|
| 103 |
+
row, col = divmod(move, 3)
|
| 104 |
+
symbol = "⚠️" if q_val > 0 else " "
|
| 105 |
+
print(f"{symbol} Move {move} (row {row}, col {col}): {q_val:.4f}")
|
| 106 |
+
|
| 107 |
+
# AI's recommended move
|
| 108 |
+
best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
|
| 109 |
+
row, col = divmod(best_move, 3)
|
| 110 |
+
print(f"\n🤖 AI's choice: Move {best_move} (row {row}, col {col})")
|
| 111 |
+
print(f"📋 Expected: {test['expected']}")
|
| 112 |
+
|
| 113 |
+
# Check if AI learned something useful
|
| 114 |
+
best_q = q_values[best_move]
|
| 115 |
+
if best_q > 0:
|
| 116 |
+
print("✅ AI has positive association with this move")
|
| 117 |
+
elif best_q < 0:
|
| 118 |
+
print("❌ AI has negative association with this move (thinks it's bad)")
|
| 119 |
+
else:
|
| 120 |
+
print("➖ AI has no learning for this move")
|
| 121 |
+
else:
|
| 122 |
+
print("No available moves!")
|
| 123 |
+
|
| 124 |
+
def analyze_learning_patterns(model):
|
| 125 |
+
"""Analyze what patterns the AI has learned"""
|
| 126 |
+
|
| 127 |
+
print("\n🔍 ANALYZING LEARNING PATTERNS")
|
| 128 |
+
print("="*50)
|
| 129 |
+
|
| 130 |
+
# Find all states with non-zero Q-values
|
| 131 |
+
non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
|
| 132 |
+
|
| 133 |
+
print(f"Total states with learning: {len(non_zero_indices)}")
|
| 134 |
+
|
| 135 |
+
# Categorize by game phase
|
| 136 |
+
phases = {
|
| 137 |
+
"early": [], # 0-2 moves made
|
| 138 |
+
"mid": [], # 3-5 moves made
|
| 139 |
+
"late": [] # 6-8 moves made
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
for idx in non_zero_indices:
|
| 143 |
+
board = decode_state(idx)
|
| 144 |
+
moves_made = sum(1 for cell in board if cell != 0)
|
| 145 |
+
|
| 146 |
+
if moves_made <= 2:
|
| 147 |
+
phases["early"].append(idx)
|
| 148 |
+
elif moves_made <= 5:
|
| 149 |
+
phases["mid"].append(idx)
|
| 150 |
+
else:
|
| 151 |
+
phases["late"].append(idx)
|
| 152 |
+
|
| 153 |
+
print(f"\nLearning by game phase:")
|
| 154 |
+
print(f" Early game (0-2 moves): {len(phases['early'])} states")
|
| 155 |
+
print(f" Mid game (3-5 moves): {len(phases['mid'])} states")
|
| 156 |
+
print(f" Late game (6-8 moves): {len(phases['late'])} states")
|
| 157 |
+
|
| 158 |
+
# Analyze Q-value distribution
|
| 159 |
+
all_q_values = model[non_zero_indices].flatten()
|
| 160 |
+
non_zero_q = all_q_values[all_q_values != 0]
|
| 161 |
+
|
| 162 |
+
if len(non_zero_q) > 0:
|
| 163 |
+
print(f"\nQ-value analysis:")
|
| 164 |
+
print(f" Total Q-values: {len(non_zero_q)}")
|
| 165 |
+
print(f" Positive Q-values: {np.sum(non_zero_q > 0)}")
|
| 166 |
+
print(f" Negative Q-values: {np.sum(non_zero_q < 0)}")
|
| 167 |
+
print(f" Average Q-value: {np.mean(non_zero_q):.4f}")
|
| 168 |
+
print(f" Most positive: {np.max(non_zero_q):.4f}")
|
| 169 |
+
print(f" Most negative: {np.min(non_zero_q):.4f}")
|
| 170 |
+
|
| 171 |
+
# Show examples of what AI learned
|
| 172 |
+
print("\n📚 Examples of learned states:")
|
| 173 |
+
|
| 174 |
+
# Find states with positive Q-values
|
| 175 |
+
positive_states = []
|
| 176 |
+
for idx in non_zero_indices:
|
| 177 |
+
if np.any(model[idx] > 0):
|
| 178 |
+
positive_states.append(idx)
|
| 179 |
+
|
| 180 |
+
if positive_states:
|
| 181 |
+
print(f"\nFound {len(positive_states)} states with positive associations")
|
| 182 |
+
for i, idx in enumerate(positive_states[:3]): # Show first 3
|
| 183 |
+
board = decode_state(idx)
|
| 184 |
+
print(f"\nExample {i+1}:")
|
| 185 |
+
display_board(board)
|
| 186 |
+
|
| 187 |
+
q_values = model[idx]
|
| 188 |
+
positive_moves = np.where(q_values > 0)[0]
|
| 189 |
+
print("Moves AI thinks are good:")
|
| 190 |
+
for move in positive_moves:
|
| 191 |
+
print(f" Move {move} (row {move//3}, col {move%3}): {q_values[move]:.4f}")
|
| 192 |
+
else:
|
| 193 |
+
print("No positive associations found - AI hasn't learned winning strategies yet")
|
| 194 |
+
|
| 195 |
+
def check_for_specific_patterns(model):
|
| 196 |
+
"""Check if AI has learned specific Tic-Tac-Toe strategies"""
|
| 197 |
+
|
| 198 |
+
print("\n🎯 CHECKING FOR SPECIFIC STRATEGIES")
|
| 199 |
+
print("="*50)
|
| 200 |
+
|
| 201 |
+
strategies = {
|
| 202 |
+
"prefer_center": 0,
|
| 203 |
+
"prefer_corners": 0,
|
| 204 |
+
"prefer_edges": 0,
|
| 205 |
+
"block_opponent": 0,
|
| 206 |
+
"create_fork": 0,
|
| 207 |
+
"avoid_losing": 0
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# Check common winning/blocking patterns
|
| 211 |
+
patterns_to_check = [
|
| 212 |
+
# Center preference
|
| 213 |
+
([0,0,0,0,0,0,0,0,0], [4], "prefer_center"),
|
| 214 |
+
|
| 215 |
+
# Corner openings
|
| 216 |
+
([0,0,0,0,1,0,0,0,0], [0,2,6,8], "prefer_corners"),
|
| 217 |
+
|
| 218 |
+
# Block vertical
|
| 219 |
+
([1,0,0,1,2,0,0,0,0], [6], "block_opponent"),
|
| 220 |
+
|
| 221 |
+
# Block horizontal
|
| 222 |
+
([1,1,0,0,2,0,0,0,0], [2], "block_opponent"),
|
| 223 |
+
|
| 224 |
+
# Block diagonal
|
| 225 |
+
([1,0,0,0,1,0,0,0,0], [8], "block_opponent"),
|
| 226 |
+
]
|
| 227 |
+
|
| 228 |
+
for board_pattern, good_moves, strategy in patterns_to_check:
|
| 229 |
+
state_idx = 0
|
| 230 |
+
for i, cell in enumerate(board_pattern):
|
| 231 |
+
state_idx += cell * (3 ** i)
|
| 232 |
+
|
| 233 |
+
q_values = model[state_idx]
|
| 234 |
+
available_moves = [i for i, cell in enumerate(board_pattern) if cell == 0]
|
| 235 |
+
|
| 236 |
+
if available_moves:
|
| 237 |
+
# Check if AI prefers any of the good moves
|
| 238 |
+
best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
|
| 239 |
+
if best_move in good_moves:
|
| 240 |
+
strategies[strategy] += 1
|
| 241 |
+
print(f"✅ AI correctly prefers {strategy.replace('_', ' ')} in this scenario")
|
| 242 |
+
else:
|
| 243 |
+
print(f"❌ AI doesn't recognize {strategy.replace('_', ' ')}")
|
| 244 |
+
|
| 245 |
+
print(f"\nStrategy recognition summary:")
|
| 246 |
+
for strategy, count in strategies.items():
|
| 247 |
+
print(f" {strategy}: {count}/1")
|
| 248 |
+
|
| 249 |
+
def train_ai_offline(model, num_games=1000):
|
| 250 |
+
"""Simulate games to improve the AI offline"""
|
| 251 |
+
|
| 252 |
+
print("\n🎮 SIMULATING OFFLINE TRAINING")
|
| 253 |
+
print("="*50)
|
| 254 |
+
|
| 255 |
+
print(f"Simulating {num_games} games of self-play...")
|
| 256 |
+
|
| 257 |
+
# Simple self-play simulation
|
| 258 |
+
import random
|
| 259 |
+
|
| 260 |
+
# We'll create a simple improvement by adding some basic strategies
|
| 261 |
+
original_non_zero = np.count_nonzero(model)
|
| 262 |
+
|
| 263 |
+
# Add some basic Tic-Tac-Toe knowledge
|
| 264 |
+
# Center is good
|
| 265 |
+
empty_board_idx = 0 # All zeros
|
| 266 |
+
model[empty_board_idx][4] = 0.1 # Center is good
|
| 267 |
+
|
| 268 |
+
# Corners are good when center is taken
|
| 269 |
+
center_taken_idx = 3**4 # Only center is 1
|
| 270 |
+
for corner in [0, 2, 6, 8]:
|
| 271 |
+
model[center_taken_idx][corner] = 0.08
|
| 272 |
+
|
| 273 |
+
# Blocking is good
|
| 274 |
+
# Example: opponent has two in a row
|
| 275 |
+
for i in range(9):
|
| 276 |
+
board = [0] * 9
|
| 277 |
+
board[i] = 1
|
| 278 |
+
board[(i+3)%9] = 1
|
| 279 |
+
if board[6] == 0: # Check if third in column is empty
|
| 280 |
+
state_idx = 0
|
| 281 |
+
for j, cell in enumerate(board):
|
| 282 |
+
state_idx += cell * (3 ** j)
|
| 283 |
+
blocking_move = 6
|
| 284 |
+
model[state_idx][blocking_move] = 0.15
|
| 285 |
+
|
| 286 |
+
new_non_zero = np.count_nonzero(model)
|
| 287 |
+
improvement = new_non_zero - original_non_zero
|
| 288 |
+
|
| 289 |
+
print(f"Added {improvement} new learned values")
|
| 290 |
+
print("Basic Tic-Tac-Toe strategies have been added to the AI")
|
| 291 |
+
|
| 292 |
+
return model
|
| 293 |
+
|
| 294 |
+
def save_improved_model(model):
|
| 295 |
+
"""Save the improved model"""
|
| 296 |
+
filename = "ttt_ai_model_improved.pkl"
|
| 297 |
+
|
| 298 |
+
# Load existing data to preserve experience buffer
|
| 299 |
+
original_filename = "ttt_ai_model.pkl"
|
| 300 |
+
if os.path.exists(original_filename):
|
| 301 |
+
with open(original_filename, 'rb') as f:
|
| 302 |
+
data = pickle.load(f)
|
| 303 |
+
else:
|
| 304 |
+
data = {'model': model, 'experience': []}
|
| 305 |
+
|
| 306 |
+
data['model'] = model
|
| 307 |
+
|
| 308 |
+
with open(filename, 'wb') as f:
|
| 309 |
+
pickle.dump(data, f)
|
| 310 |
+
|
| 311 |
+
print(f"\n💾 Improved model saved to '{filename}'")
|
| 312 |
+
|
| 313 |
+
def main():
|
| 314 |
+
"""Main function to analyze and improve the AI"""
|
| 315 |
+
|
| 316 |
+
print("🤖 TIC-TAC-TOE AI DIAGNOSTIC TOOL")
|
| 317 |
+
print("="*60)
|
| 318 |
+
|
| 319 |
+
# Load the model
|
| 320 |
+
model = load_ai_model()
|
| 321 |
+
if model is None:
|
| 322 |
+
return
|
| 323 |
+
|
| 324 |
+
# Test with common scenarios
|
| 325 |
+
test_ai_with_common_scenarios(model)
|
| 326 |
+
|
| 327 |
+
# Analyze learning patterns
|
| 328 |
+
analyze_learning_patterns(model)
|
| 329 |
+
|
| 330 |
+
# Check for specific strategies
|
| 331 |
+
check_for_specific_patterns(model)
|
| 332 |
+
|
| 333 |
+
# Offer to improve the AI
|
| 334 |
+
print("\n" + "="*60)
|
| 335 |
+
improve = input("\nWould you like to add basic Tic-Tac-Toe knowledge to the AI? (y/n): ").strip().lower()
|
| 336 |
+
|
| 337 |
+
if improve == 'y':
|
| 338 |
+
model = train_ai_offline(model)
|
| 339 |
+
save_improved_model(model)
|
| 340 |
+
print("\n✅ AI has been improved with basic strategies!")
|
| 341 |
+
print("Restart the game and use 'ttt_ai_model_improved.pkl' for better performance")
|
| 342 |
+
else:
|
| 343 |
+
print("\n📝 Recommendations for improving the AI through gameplay:")
|
| 344 |
+
print("1. Play more games against the AI")
|
| 345 |
+
print("2. Let the AI watch you play against itself")
|
| 346 |
+
print("3. Adjust learning rate to 0.2-0.3 for faster learning")
|
| 347 |
+
print("4. Reduce exploration rate to 0.1 once AI starts winning")
|
| 348 |
+
print("5. Play both as X and O to teach both perspectives")
|
| 349 |
+
|
| 350 |
+
if __name__ == "__main__":
|
| 351 |
+
main()
|
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.30.57 PM.png
ADDED
|
Git LFS Details
|
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.32 PM.png
ADDED
|
Git LFS Details
|
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.43 PM.png
ADDED
|
Git LFS Details
|
Imitation Learning Tic Tac Toe AI/analyze_ttt_model.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
from collections import deque
|
| 5 |
+
import json
|
| 6 |
+
|
| 7 |
+
def load_pickle_file(filename="ttt_ai_model.pkl"):
|
| 8 |
+
"""
|
| 9 |
+
Load and analyze the Tic-Tac-Toe AI model pickle file.
|
| 10 |
+
|
| 11 |
+
Args:
|
| 12 |
+
filename (str): Path to the pickle file
|
| 13 |
+
|
| 14 |
+
Returns:
|
| 15 |
+
dict: The loaded data or None if file doesn't exist
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
if not os.path.exists(filename):
|
| 19 |
+
print(f"❌ File '{filename}' not found!")
|
| 20 |
+
print("Possible reasons:")
|
| 21 |
+
print("1. The game hasn't been played yet")
|
| 22 |
+
print("2. The file was saved with a different name")
|
| 23 |
+
print("3. The file is in a different directory")
|
| 24 |
+
return None
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
print(f"📂 Opening '{filename}'...")
|
| 28 |
+
|
| 29 |
+
# Load the pickle file
|
| 30 |
+
with open(filename, 'rb') as f:
|
| 31 |
+
data = pickle.load(f)
|
| 32 |
+
|
| 33 |
+
print("✅ File loaded successfully!")
|
| 34 |
+
print("\n" + "="*60)
|
| 35 |
+
|
| 36 |
+
return data
|
| 37 |
+
|
| 38 |
+
except Exception as e:
|
| 39 |
+
print(f"❌ Error loading pickle file: {e}")
|
| 40 |
+
print(f"Error type: {type(e).__name__}")
|
| 41 |
+
return None
|
| 42 |
+
|
| 43 |
+
def analyze_model(data):
|
| 44 |
+
"""
|
| 45 |
+
Analyze and display information about the AI model.
|
| 46 |
+
|
| 47 |
+
Args:
|
| 48 |
+
data (dict): The loaded pickle data
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
if not data:
|
| 52 |
+
print("No data to analyze")
|
| 53 |
+
return
|
| 54 |
+
|
| 55 |
+
print("📊 MODEL ANALYSIS")
|
| 56 |
+
print("="*60)
|
| 57 |
+
|
| 58 |
+
# Check what keys are available
|
| 59 |
+
print(f"Keys in data: {list(data.keys())}")
|
| 60 |
+
|
| 61 |
+
# Analyze model matrix if present
|
| 62 |
+
if 'model' in data:
|
| 63 |
+
model = data['model']
|
| 64 |
+
print(f"\n🤖 AI Model Information:")
|
| 65 |
+
print(f" Shape: {model.shape}")
|
| 66 |
+
print(f" Size: {model.size:,} elements")
|
| 67 |
+
print(f" Data type: {model.dtype}")
|
| 68 |
+
|
| 69 |
+
# Calculate some statistics
|
| 70 |
+
print(f"\n📈 Model Statistics:")
|
| 71 |
+
print(f" Non-zero entries: {np.count_nonzero(model):,}")
|
| 72 |
+
print(f" Zero entries: {np.sum(model == 0):,}")
|
| 73 |
+
print(f" Sparsity: {(np.sum(model == 0) / model.size) * 100:.2f}%")
|
| 74 |
+
|
| 75 |
+
# Get min, max, mean values
|
| 76 |
+
if model.size > 0:
|
| 77 |
+
flat_model = model.flatten()
|
| 78 |
+
non_zero_values = flat_model[flat_model != 0]
|
| 79 |
+
|
| 80 |
+
if len(non_zero_values) > 0:
|
| 81 |
+
print(f" Min value (non-zero): {non_zero_values.min():.6f}")
|
| 82 |
+
print(f" Max value: {flat_model.max():.6f}")
|
| 83 |
+
print(f" Mean value (non-zero): {non_zero_values.mean():.6f}")
|
| 84 |
+
print(f" Std dev (non-zero): {non_zero_values.std():.6f}")
|
| 85 |
+
|
| 86 |
+
# Count of positive vs negative values
|
| 87 |
+
positive = np.sum(flat_model > 0)
|
| 88 |
+
negative = np.sum(flat_model < 0)
|
| 89 |
+
print(f" Positive values: {positive:,}")
|
| 90 |
+
print(f" Negative values: {negative:,}")
|
| 91 |
+
|
| 92 |
+
# Analyze experience replay if present
|
| 93 |
+
if 'experience' in data:
|
| 94 |
+
experience = data['experience']
|
| 95 |
+
print(f"\n🎮 Experience Replay Buffer:")
|
| 96 |
+
print(f" Number of experiences: {len(experience):,}")
|
| 97 |
+
|
| 98 |
+
if experience:
|
| 99 |
+
# Show first few experiences
|
| 100 |
+
print(f" Sample experience (first):")
|
| 101 |
+
if hasattr(experience[0], '__len__'):
|
| 102 |
+
print(f" Length: {len(experience[0])}")
|
| 103 |
+
if len(experience[0]) > 0:
|
| 104 |
+
print(f" First element type: {type(experience[0][0])}")
|
| 105 |
+
|
| 106 |
+
# Check for other data
|
| 107 |
+
for key in data.keys():
|
| 108 |
+
if key not in ['model', 'experience']:
|
| 109 |
+
value = data[key]
|
| 110 |
+
print(f"\n🔍 {key}:")
|
| 111 |
+
print(f" Type: {type(value)}")
|
| 112 |
+
if isinstance(value, (list, tuple, deque)):
|
| 113 |
+
print(f" Length: {len(value)}")
|
| 114 |
+
elif isinstance(value, dict):
|
| 115 |
+
print(f" Keys: {list(value.keys())[:5]}..." if len(value) > 5 else f" Keys: {list(value.keys())}")
|
| 116 |
+
elif isinstance(value, np.ndarray):
|
| 117 |
+
print(f" Shape: {value.shape}")
|
| 118 |
+
|
| 119 |
+
def examine_specific_states(model, num_states=5):
|
| 120 |
+
"""
|
| 121 |
+
Examine specific state-action values in the model.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
model (np.ndarray): The AI model
|
| 125 |
+
num_states (int): Number of states to examine
|
| 126 |
+
"""
|
| 127 |
+
|
| 128 |
+
print(f"\n🔬 Examining {num_states} specific states:")
|
| 129 |
+
print("-"*40)
|
| 130 |
+
|
| 131 |
+
# Find states with non-zero values
|
| 132 |
+
non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
|
| 133 |
+
|
| 134 |
+
if len(non_zero_indices) > 0:
|
| 135 |
+
print(f"Found {len(non_zero_indices):,} states with learned values")
|
| 136 |
+
|
| 137 |
+
# Sample some states to examine
|
| 138 |
+
if len(non_zero_indices) > num_states:
|
| 139 |
+
sample_indices = np.random.choice(non_zero_indices, num_states, replace=False)
|
| 140 |
+
else:
|
| 141 |
+
sample_indices = non_zero_indices
|
| 142 |
+
|
| 143 |
+
for i, state_idx in enumerate(sample_indices):
|
| 144 |
+
q_values = model[state_idx]
|
| 145 |
+
non_zero_q = q_values[q_values != 0]
|
| 146 |
+
|
| 147 |
+
if len(non_zero_q) > 0:
|
| 148 |
+
print(f"\nState {i+1} (Index {state_idx}):")
|
| 149 |
+
print(f" Non-zero Q-values: {len(non_zero_q)}")
|
| 150 |
+
print(f" Actions with values:")
|
| 151 |
+
for action in np.where(q_values != 0)[0]:
|
| 152 |
+
print(f" Action {action}: {q_values[action]:.4f}")
|
| 153 |
+
else:
|
| 154 |
+
print("No states with learned values found yet.")
|
| 155 |
+
|
| 156 |
+
def decode_state(state_index):
|
| 157 |
+
"""
|
| 158 |
+
Convert a state index back to a board representation.
|
| 159 |
+
This assumes the same encoding used in the game.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
state_index (int): The encoded state index
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
list: Board representation (0=empty, 1=X, 2=O)
|
| 166 |
+
"""
|
| 167 |
+
board = [0] * 9
|
| 168 |
+
temp_index = state_index
|
| 169 |
+
|
| 170 |
+
for i in range(9):
|
| 171 |
+
board[i] = temp_index % 3
|
| 172 |
+
temp_index //= 3
|
| 173 |
+
|
| 174 |
+
return board
|
| 175 |
+
|
| 176 |
+
def display_board(board):
|
| 177 |
+
"""
|
| 178 |
+
Display a Tic-Tac-Toe board in human-readable format.
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
board (list): Board representation
|
| 182 |
+
"""
|
| 183 |
+
symbols = {0: '.', 1: 'X', 2: 'O'}
|
| 184 |
+
|
| 185 |
+
print("Board state:")
|
| 186 |
+
for row in range(3):
|
| 187 |
+
row_chars = [symbols[board[row*3 + col]] for col in range(3)]
|
| 188 |
+
print(" " + " | ".join(row_chars))
|
| 189 |
+
if row < 2:
|
| 190 |
+
print(" " + "-" * 9)
|
| 191 |
+
|
| 192 |
+
def explore_model_interactively(model):
|
| 193 |
+
"""
|
| 194 |
+
Interactive exploration of the model.
|
| 195 |
+
|
| 196 |
+
Args:
|
| 197 |
+
model (np.ndarray): The AI model
|
| 198 |
+
"""
|
| 199 |
+
|
| 200 |
+
print("\n🎯 INTERACTIVE EXPLORATION")
|
| 201 |
+
print("="*60)
|
| 202 |
+
|
| 203 |
+
while True:
|
| 204 |
+
print("\nOptions:")
|
| 205 |
+
print("1. Look up a specific state")
|
| 206 |
+
print("2. Find states with highest Q-values")
|
| 207 |
+
print("3. Find best action for a given state")
|
| 208 |
+
print("4. Exit exploration")
|
| 209 |
+
|
| 210 |
+
choice = input("\nEnter your choice (1-4): ").strip()
|
| 211 |
+
|
| 212 |
+
if choice == '1':
|
| 213 |
+
try:
|
| 214 |
+
state_idx = int(input("Enter state index (0-19682): "))
|
| 215 |
+
if 0 <= state_idx < model.shape[0]:
|
| 216 |
+
board = decode_state(state_idx)
|
| 217 |
+
display_board(board)
|
| 218 |
+
|
| 219 |
+
q_values = model[state_idx]
|
| 220 |
+
print(f"\nQ-values for state {state_idx}:")
|
| 221 |
+
for action in range(9):
|
| 222 |
+
if q_values[action] != 0:
|
| 223 |
+
print(f" Action {action} (row {action//3}, col {action%3}): {q_values[action]:.4f}")
|
| 224 |
+
|
| 225 |
+
# Show best action
|
| 226 |
+
best_action = np.argmax(q_values)
|
| 227 |
+
print(f"\nBest action: {best_action} (row {best_action//3}, col {best_action%3})")
|
| 228 |
+
else:
|
| 229 |
+
print("Invalid state index!")
|
| 230 |
+
except ValueError:
|
| 231 |
+
print("Please enter a valid number!")
|
| 232 |
+
|
| 233 |
+
elif choice == '2':
|
| 234 |
+
try:
|
| 235 |
+
num_states = int(input("How many top states? (1-100): "))
|
| 236 |
+
num_states = max(1, min(100, num_states))
|
| 237 |
+
|
| 238 |
+
# Find states with maximum Q-values
|
| 239 |
+
max_q_per_state = np.max(model, axis=1)
|
| 240 |
+
top_indices = np.argsort(max_q_per_state)[-num_states:][::-1]
|
| 241 |
+
|
| 242 |
+
print(f"\nTop {num_states} states with highest Q-values:")
|
| 243 |
+
for i, idx in enumerate(top_indices[:10]): # Show first 10
|
| 244 |
+
max_q = max_q_per_state[idx]
|
| 245 |
+
if max_q > 0:
|
| 246 |
+
board = decode_state(idx)
|
| 247 |
+
print(f"\n{i+1}. State {idx} (max Q: {max_q:.4f})")
|
| 248 |
+
display_board(board)
|
| 249 |
+
except ValueError:
|
| 250 |
+
print("Please enter a valid number!")
|
| 251 |
+
|
| 252 |
+
elif choice == '3':
|
| 253 |
+
# Create a board manually
|
| 254 |
+
print("\nEnter board state (9 numbers, 0=empty, 1=X, 2=O)")
|
| 255 |
+
print("Example: 0 0 1 0 2 0 0 0 0")
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
board_input = input("Board: ").strip()
|
| 259 |
+
if len(board_input) == 0:
|
| 260 |
+
# Use default example
|
| 261 |
+
board = [0, 0, 1, 0, 2, 0, 0, 0, 0]
|
| 262 |
+
else:
|
| 263 |
+
board = [int(x) for x in board_input.split()]
|
| 264 |
+
|
| 265 |
+
if len(board) != 9 or any(x not in [0, 1, 2] for x in board):
|
| 266 |
+
raise ValueError("Invalid board")
|
| 267 |
+
|
| 268 |
+
display_board(board)
|
| 269 |
+
|
| 270 |
+
# Convert to state index
|
| 271 |
+
state_idx = 0
|
| 272 |
+
for i, cell in enumerate(board):
|
| 273 |
+
state_idx += cell * (3 ** i)
|
| 274 |
+
|
| 275 |
+
q_values = model[state_idx]
|
| 276 |
+
|
| 277 |
+
# Only show available moves
|
| 278 |
+
available_moves = [i for i, cell in enumerate(board) if cell == 0]
|
| 279 |
+
|
| 280 |
+
print("\nAvailable moves and their Q-values:")
|
| 281 |
+
for move in available_moves:
|
| 282 |
+
q_val = q_values[move]
|
| 283 |
+
row, col = divmod(move, 3)
|
| 284 |
+
print(f" Move {move} (row {row}, col {col}): {q_val:.4f}")
|
| 285 |
+
|
| 286 |
+
if available_moves:
|
| 287 |
+
best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
|
| 288 |
+
row, col = divmod(best_move, 3)
|
| 289 |
+
print(f"\nRecommended move: {best_move} (row {row}, col {col})")
|
| 290 |
+
else:
|
| 291 |
+
print("No available moves!")
|
| 292 |
+
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print(f"Error: {e}")
|
| 295 |
+
|
| 296 |
+
elif choice == '4':
|
| 297 |
+
print("Exiting interactive exploration.")
|
| 298 |
+
break
|
| 299 |
+
|
| 300 |
+
else:
|
| 301 |
+
print("Invalid choice!")
|
| 302 |
+
|
| 303 |
+
def save_model_summary(data, filename="model_summary.json"):
|
| 304 |
+
"""
|
| 305 |
+
Save a summary of the model to a JSON file.
|
| 306 |
+
|
| 307 |
+
Args:
|
| 308 |
+
data (dict): The loaded pickle data
|
| 309 |
+
filename (str): Output JSON filename
|
| 310 |
+
"""
|
| 311 |
+
if not data:
|
| 312 |
+
return
|
| 313 |
+
|
| 314 |
+
summary = {}
|
| 315 |
+
|
| 316 |
+
if 'model' in data:
|
| 317 |
+
model = data['model']
|
| 318 |
+
summary['model'] = {
|
| 319 |
+
'shape': model.shape,
|
| 320 |
+
'size': int(model.size),
|
| 321 |
+
'non_zero_entries': int(np.count_nonzero(model)),
|
| 322 |
+
'sparsity': float((np.sum(model == 0) / model.size) * 100)
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
if 'experience' in data:
|
| 326 |
+
experience = data['experience']
|
| 327 |
+
summary['experience'] = {
|
| 328 |
+
'count': len(experience)
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
with open(filename, 'w') as f:
|
| 333 |
+
json.dump(summary, f, indent=2)
|
| 334 |
+
print(f"\n💾 Model summary saved to '{filename}'")
|
| 335 |
+
except Exception as e:
|
| 336 |
+
print(f"Error saving summary: {e}")
|
| 337 |
+
|
| 338 |
+
def main():
|
| 339 |
+
"""
|
| 340 |
+
Main function to load and analyze the pickle file.
|
| 341 |
+
"""
|
| 342 |
+
print("🔍 Tic-Tac-Toe AI Model Analyzer")
|
| 343 |
+
print("="*60)
|
| 344 |
+
|
| 345 |
+
# Try to load the pickle file
|
| 346 |
+
filename = "ttt_ai_model.pkl"
|
| 347 |
+
data = load_pickle_file(filename)
|
| 348 |
+
|
| 349 |
+
if data:
|
| 350 |
+
# Analyze the model
|
| 351 |
+
analyze_model(data)
|
| 352 |
+
|
| 353 |
+
# If model exists, do more detailed analysis
|
| 354 |
+
if 'model' in data:
|
| 355 |
+
# Examine specific states
|
| 356 |
+
examine_specific_states(data['model'])
|
| 357 |
+
|
| 358 |
+
# Interactive exploration
|
| 359 |
+
explore = input("\nWould you like to explore the model interactively? (y/n): ").strip().lower()
|
| 360 |
+
if explore == 'y':
|
| 361 |
+
explore_model_interactively(data['model'])
|
| 362 |
+
|
| 363 |
+
# Save summary
|
| 364 |
+
save = input("\nWould you like to save a summary? (y/n): ").strip().lower()
|
| 365 |
+
if save == 'y':
|
| 366 |
+
save_model_summary(data)
|
| 367 |
+
|
| 368 |
+
# Additional file info
|
| 369 |
+
print("\n📄 File Information:")
|
| 370 |
+
print(f" File size: {os.path.getsize(filename):,} bytes")
|
| 371 |
+
print(f" Last modified: {os.path.getmtime(filename):.0f}")
|
| 372 |
+
|
| 373 |
+
print("\n" + "="*60)
|
| 374 |
+
print("Analysis complete!")
|
| 375 |
+
|
| 376 |
+
if __name__ == "__main__":
|
| 377 |
+
main()
|
Imitation Learning Tic Tac Toe AI/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
PyQt5
|
| 2 |
+
numpy
|
Imitation Learning Tic Tac Toe AI/tic_tac_toe_ai.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import numpy as np
|
| 3 |
+
import random
|
| 4 |
+
from collections import deque
|
| 5 |
+
import pickle
|
| 6 |
+
import os
|
| 7 |
+
from PyQt5.QtWidgets import *
|
| 8 |
+
from PyQt5.QtCore import *
|
| 9 |
+
from PyQt5.QtGui import *
|
| 10 |
+
|
| 11 |
+
class TicTacToeAI:
|
| 12 |
+
"""AI that learns by imitating human player moves"""
|
| 13 |
+
|
| 14 |
+
def __init__(self):
|
| 15 |
+
self.experience = deque(maxlen=10000)
|
| 16 |
+
self.state_history = []
|
| 17 |
+
self.move_history = []
|
| 18 |
+
self.model = self.create_model()
|
| 19 |
+
self.learning_rate = 0.1
|
| 20 |
+
self.epsilon = 0.3 # Exploration rate
|
| 21 |
+
self.load_data()
|
| 22 |
+
|
| 23 |
+
def create_model(self):
|
| 24 |
+
"""Simple Q-learning style model"""
|
| 25 |
+
# State representation: 9 cells (0=empty, 1=X, 2=O)
|
| 26 |
+
# Action: 9 possible moves
|
| 27 |
+
return np.zeros((3**9, 9)) # Simplified representation
|
| 28 |
+
|
| 29 |
+
def state_to_index(self, board):
|
| 30 |
+
"""Convert board state to a unique index"""
|
| 31 |
+
index = 0
|
| 32 |
+
for i, cell in enumerate(board):
|
| 33 |
+
index += cell * (3 ** i)
|
| 34 |
+
return index
|
| 35 |
+
|
| 36 |
+
def get_action(self, board, available_moves):
|
| 37 |
+
"""Choose an action based on current policy"""
|
| 38 |
+
# Random exploration
|
| 39 |
+
if random.random() < self.epsilon:
|
| 40 |
+
return random.choice(available_moves)
|
| 41 |
+
|
| 42 |
+
# Exploitation: choose best learned move
|
| 43 |
+
state_idx = self.state_to_index(board)
|
| 44 |
+
q_values = self.model[state_idx]
|
| 45 |
+
|
| 46 |
+
# Filter available moves and choose best
|
| 47 |
+
available_q_values = [q_values[move] if move in available_moves else -float('inf')
|
| 48 |
+
for move in range(9)]
|
| 49 |
+
return np.argmax(available_q_values)
|
| 50 |
+
|
| 51 |
+
def record_move(self, board, move):
|
| 52 |
+
"""Record state-action pair for learning"""
|
| 53 |
+
self.state_history.append(board.copy())
|
| 54 |
+
self.move_history.append(move)
|
| 55 |
+
|
| 56 |
+
def learn_from_game(self, winner):
|
| 57 |
+
"""Learn from the completed game"""
|
| 58 |
+
if not self.state_history:
|
| 59 |
+
return
|
| 60 |
+
|
| 61 |
+
reward = 0.1 if winner == 'O' else -0.1 if winner == 'X' else 0.05
|
| 62 |
+
|
| 63 |
+
for i, (state, move) in enumerate(zip(self.state_history, self.move_history)):
|
| 64 |
+
state_idx = self.state_to_index(state)
|
| 65 |
+
self.model[state_idx][move] += self.learning_rate * reward
|
| 66 |
+
|
| 67 |
+
# Clear history for next game
|
| 68 |
+
self.state_history = []
|
| 69 |
+
self.move_history = []
|
| 70 |
+
|
| 71 |
+
self.save_data()
|
| 72 |
+
|
| 73 |
+
def save_data(self):
|
| 74 |
+
"""Save learned model"""
|
| 75 |
+
try:
|
| 76 |
+
data = {
|
| 77 |
+
'model': self.model,
|
| 78 |
+
'experience': list(self.experience)
|
| 79 |
+
}
|
| 80 |
+
with open('ttt_ai_model.pkl', 'wb') as f:
|
| 81 |
+
pickle.dump(data, f)
|
| 82 |
+
except:
|
| 83 |
+
pass
|
| 84 |
+
|
| 85 |
+
def load_data(self):
|
| 86 |
+
"""Load saved model"""
|
| 87 |
+
if os.path.exists('ttt_ai_model.pkl'):
|
| 88 |
+
try:
|
| 89 |
+
with open('ttt_ai_model.pkl', 'rb') as f:
|
| 90 |
+
data = pickle.load(f)
|
| 91 |
+
self.model = data.get('model', self.model)
|
| 92 |
+
self.experience = deque(data.get('experience', []), maxlen=10000)
|
| 93 |
+
except:
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
class TicTacToeGame:
|
| 97 |
+
"""Game logic"""
|
| 98 |
+
|
| 99 |
+
def __init__(self):
|
| 100 |
+
self.reset()
|
| 101 |
+
|
| 102 |
+
def reset(self):
|
| 103 |
+
self.board = [0] * 9 # 0=empty, 1=X, 2=O
|
| 104 |
+
self.current_player = 1 # X starts
|
| 105 |
+
self.winner = None
|
| 106 |
+
self.game_over = False
|
| 107 |
+
self.moves = 0
|
| 108 |
+
|
| 109 |
+
def make_move(self, position):
|
| 110 |
+
"""Make a move at given position"""
|
| 111 |
+
if self.board[position] != 0 or self.game_over:
|
| 112 |
+
return False
|
| 113 |
+
|
| 114 |
+
self.board[position] = self.current_player
|
| 115 |
+
self.moves += 1
|
| 116 |
+
|
| 117 |
+
# Check for winner
|
| 118 |
+
self.winner = self.check_winner()
|
| 119 |
+
if self.winner or self.moves == 9:
|
| 120 |
+
self.game_over = True
|
| 121 |
+
else:
|
| 122 |
+
# Switch player
|
| 123 |
+
self.current_player = 3 - self.current_player # Switches between 1 and 2
|
| 124 |
+
|
| 125 |
+
return True
|
| 126 |
+
|
| 127 |
+
def check_winner(self):
|
| 128 |
+
"""Check if there's a winner"""
|
| 129 |
+
winning_combinations = [
|
| 130 |
+
[0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
|
| 131 |
+
[0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
|
| 132 |
+
[0, 4, 8], [2, 4, 6] # Diagonals
|
| 133 |
+
]
|
| 134 |
+
|
| 135 |
+
for combo in winning_combinations:
|
| 136 |
+
if (self.board[combo[0]] == self.board[combo[1]] ==
|
| 137 |
+
self.board[combo[2]] != 0):
|
| 138 |
+
return 'X' if self.board[combo[0]] == 1 else 'O'
|
| 139 |
+
|
| 140 |
+
return None
|
| 141 |
+
|
| 142 |
+
def get_available_moves(self):
|
| 143 |
+
"""Get list of available positions"""
|
| 144 |
+
return [i for i, cell in enumerate(self.board) if cell == 0]
|
| 145 |
+
|
| 146 |
+
def get_board_state(self):
|
| 147 |
+
"""Return copy of board"""
|
| 148 |
+
return self.board.copy()
|
| 149 |
+
|
| 150 |
+
class GameBoard(QWidget):
|
| 151 |
+
"""Tic-Tac-Toe game board widget"""
|
| 152 |
+
|
| 153 |
+
def __init__(self):
|
| 154 |
+
super().__init__()
|
| 155 |
+
self.cell_size = 100
|
| 156 |
+
self.setFixedSize(self.cell_size * 3 + 20, self.cell_size * 3 + 20)
|
| 157 |
+
|
| 158 |
+
def paintEvent(self, event):
|
| 159 |
+
painter = QPainter(self)
|
| 160 |
+
painter.setRenderHint(QPainter.Antialiasing)
|
| 161 |
+
|
| 162 |
+
# Draw background
|
| 163 |
+
painter.fillRect(self.rect(), QColor(240, 240, 240))
|
| 164 |
+
|
| 165 |
+
# Draw grid
|
| 166 |
+
painter.setPen(QPen(QColor(0, 0, 0), 3))
|
| 167 |
+
for i in range(1, 3):
|
| 168 |
+
# Vertical lines
|
| 169 |
+
painter.drawLine(
|
| 170 |
+
self.cell_size * i + 10, 10,
|
| 171 |
+
self.cell_size * i + 10, self.cell_size * 3 + 10
|
| 172 |
+
)
|
| 173 |
+
# Horizontal lines
|
| 174 |
+
painter.drawLine(
|
| 175 |
+
10, self.cell_size * i + 10,
|
| 176 |
+
self.cell_size * 3 + 10, self.cell_size * i + 10
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
# Draw X's and O's
|
| 180 |
+
if hasattr(self, 'game'):
|
| 181 |
+
for i in range(9):
|
| 182 |
+
row, col = divmod(i, 3)
|
| 183 |
+
x = col * self.cell_size + 10
|
| 184 |
+
y = row * self.cell_size + 10
|
| 185 |
+
|
| 186 |
+
if self.game.board[i] == 1: # X
|
| 187 |
+
painter.setPen(QPen(QColor(220, 50, 50), 4))
|
| 188 |
+
painter.drawLine(x + 20, y + 20, x + self.cell_size - 20, y + self.cell_size - 20)
|
| 189 |
+
painter.drawLine(x + self.cell_size - 20, y + 20, x + 20, y + self.cell_size - 20)
|
| 190 |
+
elif self.game.board[i] == 2: # O
|
| 191 |
+
painter.setPen(QPen(QColor(50, 50, 220), 4))
|
| 192 |
+
painter.drawEllipse(x + 20, y + 20, self.cell_size - 40, self.cell_size - 40)
|
| 193 |
+
|
| 194 |
+
# Draw winner line if exists
|
| 195 |
+
if hasattr(self, 'game') and self.game.winner:
|
| 196 |
+
self.draw_winner_line(painter)
|
| 197 |
+
|
| 198 |
+
def draw_winner_line(self, painter):
|
| 199 |
+
"""Draw line through winning combination"""
|
| 200 |
+
winning_combinations = [
|
| 201 |
+
[0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
|
| 202 |
+
[0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
|
| 203 |
+
[0, 4, 8], [2, 4, 6] # Diagonals
|
| 204 |
+
]
|
| 205 |
+
|
| 206 |
+
painter.setPen(QPen(QColor(0, 200, 0), 6))
|
| 207 |
+
|
| 208 |
+
for combo in winning_combinations:
|
| 209 |
+
if (self.game.board[combo[0]] == self.game.board[combo[1]] ==
|
| 210 |
+
self.game.board[combo[2]] != 0):
|
| 211 |
+
# Calculate positions
|
| 212 |
+
positions = []
|
| 213 |
+
for pos in combo:
|
| 214 |
+
row, col = divmod(pos, 3)
|
| 215 |
+
x = col * self.cell_size + self.cell_size // 2 + 10
|
| 216 |
+
y = row * self.cell_size + self.cell_size // 2 + 10
|
| 217 |
+
positions.append((x, y))
|
| 218 |
+
|
| 219 |
+
painter.drawLine(positions[0][0], positions[0][1],
|
| 220 |
+
positions[2][0], positions[2][1])
|
| 221 |
+
break
|
| 222 |
+
|
| 223 |
+
def mousePressEvent(self, event):
|
| 224 |
+
if event.button() == Qt.LeftButton:
|
| 225 |
+
x = event.x() - 10
|
| 226 |
+
y = event.y() - 10
|
| 227 |
+
|
| 228 |
+
if 0 <= x < self.cell_size * 3 and 0 <= y < self.cell_size * 3:
|
| 229 |
+
col = x // self.cell_size
|
| 230 |
+
row = y // self.cell_size
|
| 231 |
+
position = row * 3 + col
|
| 232 |
+
|
| 233 |
+
if hasattr(self, 'on_cell_clicked'):
|
| 234 |
+
self.on_cell_clicked(position)
|
| 235 |
+
|
| 236 |
+
class ImitationLearningGame(QMainWindow):
|
| 237 |
+
"""Main game window with imitation learning"""
|
| 238 |
+
|
| 239 |
+
def __init__(self):
|
| 240 |
+
super().__init__()
|
| 241 |
+
self.game = TicTacToeGame()
|
| 242 |
+
self.ai = TicTacToeAI()
|
| 243 |
+
self.player_symbol = 1 # X
|
| 244 |
+
self.ai_symbol = 2 # O
|
| 245 |
+
self.training_mode = True
|
| 246 |
+
self.ai_turn = False
|
| 247 |
+
self.games_played = 0
|
| 248 |
+
self.ai_wins = 0
|
| 249 |
+
self.player_wins = 0
|
| 250 |
+
self.ties = 0
|
| 251 |
+
|
| 252 |
+
self.init_ui()
|
| 253 |
+
self.start_new_game()
|
| 254 |
+
|
| 255 |
+
def init_ui(self):
|
| 256 |
+
self.setWindowTitle('Imitation Learning Tic-Tac-Toe')
|
| 257 |
+
self.setGeometry(100, 100, 400, 500)
|
| 258 |
+
|
| 259 |
+
# Central widget
|
| 260 |
+
central_widget = QWidget()
|
| 261 |
+
self.setCentralWidget(central_widget)
|
| 262 |
+
layout = QVBoxLayout()
|
| 263 |
+
|
| 264 |
+
# Game board
|
| 265 |
+
self.board_widget = GameBoard()
|
| 266 |
+
self.board_widget.on_cell_clicked = self.handle_cell_click
|
| 267 |
+
self.board_widget.game = self.game
|
| 268 |
+
layout.addWidget(self.board_widget)
|
| 269 |
+
|
| 270 |
+
# Info panel
|
| 271 |
+
info_layout = QHBoxLayout()
|
| 272 |
+
|
| 273 |
+
self.status_label = QLabel("Your turn (X)")
|
| 274 |
+
self.status_label.setFont(QFont('Arial', 14))
|
| 275 |
+
info_layout.addWidget(self.status_label)
|
| 276 |
+
|
| 277 |
+
info_layout.addStretch()
|
| 278 |
+
|
| 279 |
+
self.stats_label = QLabel("Games: 0 | AI Wins: 0 | Your Wins: 0 | Ties: 0")
|
| 280 |
+
self.stats_label.setFont(QFont('Arial', 10))
|
| 281 |
+
info_layout.addWidget(self.stats_label)
|
| 282 |
+
|
| 283 |
+
layout.addLayout(info_layout)
|
| 284 |
+
|
| 285 |
+
# Control panel
|
| 286 |
+
control_layout = QHBoxLayout()
|
| 287 |
+
|
| 288 |
+
self.train_button = QPushButton("Toggle Training: ON")
|
| 289 |
+
self.train_button.clicked.connect(self.toggle_training)
|
| 290 |
+
control_layout.addWidget(self.train_button)
|
| 291 |
+
|
| 292 |
+
self.new_game_button = QPushButton("New Game")
|
| 293 |
+
self.new_game_button.clicked.connect(self.start_new_game)
|
| 294 |
+
control_layout.addWidget(self.new_game_button)
|
| 295 |
+
|
| 296 |
+
self.reset_ai_button = QPushButton("Reset AI")
|
| 297 |
+
self.reset_ai_button.clicked.connect(self.reset_ai)
|
| 298 |
+
control_layout.addWidget(self.reset_ai_button)
|
| 299 |
+
|
| 300 |
+
layout.addLayout(control_layout)
|
| 301 |
+
|
| 302 |
+
# Learning parameters
|
| 303 |
+
param_layout = QHBoxLayout()
|
| 304 |
+
|
| 305 |
+
param_layout.addWidget(QLabel("Learning Rate:"))
|
| 306 |
+
self.learning_rate_slider = QSlider(Qt.Horizontal)
|
| 307 |
+
self.learning_rate_slider.setRange(1, 20)
|
| 308 |
+
self.learning_rate_slider.setValue(10)
|
| 309 |
+
self.learning_rate_slider.valueChanged.connect(self.update_learning_rate)
|
| 310 |
+
param_layout.addWidget(self.learning_rate_slider)
|
| 311 |
+
|
| 312 |
+
param_layout.addWidget(QLabel("Exploration:"))
|
| 313 |
+
self.exploration_slider = QSlider(Qt.Horizontal)
|
| 314 |
+
self.exploration_slider.setRange(0, 100)
|
| 315 |
+
self.exploration_slider.setValue(30)
|
| 316 |
+
self.exploration_slider.valueChanged.connect(self.update_exploration)
|
| 317 |
+
param_layout.addWidget(self.exploration_slider)
|
| 318 |
+
|
| 319 |
+
layout.addLayout(param_layout)
|
| 320 |
+
|
| 321 |
+
# Learning info
|
| 322 |
+
self.learning_info = QTextEdit()
|
| 323 |
+
self.learning_info.setMaximumHeight(100)
|
| 324 |
+
self.learning_info.setReadOnly(True)
|
| 325 |
+
layout.addWidget(self.learning_info)
|
| 326 |
+
|
| 327 |
+
central_widget.setLayout(layout)
|
| 328 |
+
|
| 329 |
+
# Timer for AI moves
|
| 330 |
+
self.ai_timer = QTimer()
|
| 331 |
+
self.ai_timer.timeout.connect(self.make_ai_move)
|
| 332 |
+
|
| 333 |
+
self.add_log("AI initialized. Start playing to train the AI!")
|
| 334 |
+
|
| 335 |
+
def start_new_game(self):
|
| 336 |
+
self.game.reset()
|
| 337 |
+
self.ai_turn = False # Player starts
|
| 338 |
+
self.status_label.setText("Your turn (X)")
|
| 339 |
+
self.board_widget.update()
|
| 340 |
+
|
| 341 |
+
if self.ai_turn:
|
| 342 |
+
self.ai_timer.start(500) # AI moves after 0.5 seconds
|
| 343 |
+
|
| 344 |
+
def toggle_training(self):
|
| 345 |
+
self.training_mode = not self.training_mode
|
| 346 |
+
self.train_button.setText(f"Toggle Training: {'ON' if self.training_mode else 'OFF'}")
|
| 347 |
+
self.add_log(f"Training mode {'enabled' if self.training_mode else 'disabled'}")
|
| 348 |
+
|
| 349 |
+
def reset_ai(self):
|
| 350 |
+
self.ai = TicTacToeAI()
|
| 351 |
+
self.games_played = 0
|
| 352 |
+
self.ai_wins = 0
|
| 353 |
+
self.player_wins = 0
|
| 354 |
+
self.ties = 0
|
| 355 |
+
self.update_stats()
|
| 356 |
+
self.add_log("AI has been reset. Starting fresh learning!")
|
| 357 |
+
|
| 358 |
+
def update_learning_rate(self, value):
|
| 359 |
+
self.ai.learning_rate = value / 100.0
|
| 360 |
+
self.add_log(f"Learning rate set to {self.ai.learning_rate:.2f}")
|
| 361 |
+
|
| 362 |
+
def update_exploration(self, value):
|
| 363 |
+
self.ai.epsilon = value / 100.0
|
| 364 |
+
self.add_log(f"Exploration rate set to {self.ai.epsilon:.2f}")
|
| 365 |
+
|
| 366 |
+
def handle_cell_click(self, position):
|
| 367 |
+
if self.game.game_over or self.ai_turn:
|
| 368 |
+
return
|
| 369 |
+
|
| 370 |
+
if self.game.make_move(position):
|
| 371 |
+
# Record AI's learning from opponent move
|
| 372 |
+
if self.training_mode:
|
| 373 |
+
self.ai.record_move(self.game.get_board_state(), position)
|
| 374 |
+
|
| 375 |
+
self.board_widget.update()
|
| 376 |
+
|
| 377 |
+
if self.game.game_over:
|
| 378 |
+
self.end_game()
|
| 379 |
+
else:
|
| 380 |
+
# Switch to AI turn
|
| 381 |
+
self.ai_turn = True
|
| 382 |
+
self.status_label.setText("AI thinking...")
|
| 383 |
+
self.ai_timer.start(500) # AI moves after 0.5 seconds
|
| 384 |
+
|
| 385 |
+
def make_ai_move(self):
|
| 386 |
+
self.ai_timer.stop()
|
| 387 |
+
|
| 388 |
+
if self.game.game_over:
|
| 389 |
+
return
|
| 390 |
+
|
| 391 |
+
available_moves = self.game.get_available_moves()
|
| 392 |
+
if not available_moves:
|
| 393 |
+
return
|
| 394 |
+
|
| 395 |
+
# Get AI move
|
| 396 |
+
ai_move = self.ai.get_action(self.game.get_board_state(), available_moves)
|
| 397 |
+
|
| 398 |
+
if self.game.make_move(ai_move):
|
| 399 |
+
# Record AI's own move for learning
|
| 400 |
+
if self.training_mode:
|
| 401 |
+
self.ai.record_move(self.game.get_board_state(), ai_move)
|
| 402 |
+
|
| 403 |
+
self.board_widget.update()
|
| 404 |
+
|
| 405 |
+
if self.game.game_over:
|
| 406 |
+
self.end_game()
|
| 407 |
+
else:
|
| 408 |
+
self.ai_turn = False
|
| 409 |
+
self.status_label.setText("Your turn (X)")
|
| 410 |
+
|
| 411 |
+
def end_game(self):
|
| 412 |
+
winner = self.game.winner
|
| 413 |
+
|
| 414 |
+
# Update statistics
|
| 415 |
+
self.games_played += 1
|
| 416 |
+
if winner == 'X':
|
| 417 |
+
self.player_wins += 1
|
| 418 |
+
result_text = "You win!"
|
| 419 |
+
elif winner == 'O':
|
| 420 |
+
self.ai_wins += 1
|
| 421 |
+
result_text = "AI wins!"
|
| 422 |
+
else:
|
| 423 |
+
self.ties += 1
|
| 424 |
+
result_text = "It's a tie!"
|
| 425 |
+
|
| 426 |
+
# AI learns from the game
|
| 427 |
+
if self.training_mode:
|
| 428 |
+
self.ai.learn_from_game(winner)
|
| 429 |
+
self.add_log(f"Game {self.games_played}: {result_text} AI learning updated.")
|
| 430 |
+
else:
|
| 431 |
+
self.add_log(f"Game {self.games_played}: {result_text}")
|
| 432 |
+
|
| 433 |
+
self.status_label.setText(result_text)
|
| 434 |
+
self.update_stats()
|
| 435 |
+
|
| 436 |
+
# Show end game dialog
|
| 437 |
+
msg = QMessageBox()
|
| 438 |
+
msg.setWindowTitle("Game Over")
|
| 439 |
+
msg.setText(result_text)
|
| 440 |
+
msg.setInformativeText("Do you want to play again?")
|
| 441 |
+
msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
|
| 442 |
+
|
| 443 |
+
if msg.exec_() == QMessageBox.Yes:
|
| 444 |
+
self.start_new_game()
|
| 445 |
+
|
| 446 |
+
def update_stats(self):
|
| 447 |
+
self.stats_label.setText(
|
| 448 |
+
f"Games: {self.games_played} | "
|
| 449 |
+
f"AI Wins: {self.ai_wins} | "
|
| 450 |
+
f"Your Wins: {self.player_wins} | "
|
| 451 |
+
f"Ties: {self.ties}"
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
def add_log(self, message):
|
| 455 |
+
self.learning_info.append(f"[Game {self.games_played}] {message}")
|
| 456 |
+
|
| 457 |
+
def main():
|
| 458 |
+
app = QApplication(sys.argv)
|
| 459 |
+
app.setStyle('Fusion')
|
| 460 |
+
|
| 461 |
+
# Set dark theme
|
| 462 |
+
palette = QPalette()
|
| 463 |
+
palette.setColor(QPalette.Window, QColor(53, 53, 53))
|
| 464 |
+
palette.setColor(QPalette.WindowText, Qt.white)
|
| 465 |
+
palette.setColor(QPalette.Base, QColor(25, 25, 25))
|
| 466 |
+
palette.setColor(QPalette.AlternateBase, QColor(53, 53, 53))
|
| 467 |
+
palette.setColor(QPalette.ToolTipBase, Qt.white)
|
| 468 |
+
palette.setColor(QPalette.ToolTipText, Qt.white)
|
| 469 |
+
palette.setColor(QPalette.Text, Qt.white)
|
| 470 |
+
palette.setColor(QPalette.Button, QColor(53, 53, 53))
|
| 471 |
+
palette.setColor(QPalette.ButtonText, Qt.white)
|
| 472 |
+
palette.setColor(QPalette.BrightText, Qt.red)
|
| 473 |
+
palette.setColor(QPalette.Link, QColor(42, 130, 218))
|
| 474 |
+
palette.setColor(QPalette.Highlight, QColor(42, 130, 218))
|
| 475 |
+
palette.setColor(QPalette.HighlightedText, Qt.black)
|
| 476 |
+
app.setPalette(palette)
|
| 477 |
+
|
| 478 |
+
game = ImitationLearningGame()
|
| 479 |
+
game.show()
|
| 480 |
+
sys.exit(app.exec_())
|
| 481 |
+
|
| 482 |
+
if __name__ == '__main__':
|
| 483 |
+
main()
|
Imitation Learning Tic Tac Toe AI/ttt_ai_model.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:deb75c4a0df6e020246eee77f4304d89180acc388c925abfa27f4236c94279ec
|
| 3 |
+
size 1417366
|
Imitation Learning Tic Tac Toe AI/ttt_diagnostic.py
ADDED
|
@@ -0,0 +1,351 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pickle
|
| 2 |
+
import numpy as np
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
def load_ai_model():
|
| 6 |
+
"""Load the AI model from pickle file"""
|
| 7 |
+
filename = "ttt_ai_model.pkl"
|
| 8 |
+
if not os.path.exists(filename):
|
| 9 |
+
print("Model file not found!")
|
| 10 |
+
return None
|
| 11 |
+
|
| 12 |
+
with open(filename, 'rb') as f:
|
| 13 |
+
data = pickle.load(f)
|
| 14 |
+
|
| 15 |
+
return data['model']
|
| 16 |
+
|
| 17 |
+
def decode_state(state_index):
|
| 18 |
+
"""Convert state index to board representation"""
|
| 19 |
+
board = [0] * 9
|
| 20 |
+
temp_index = state_index
|
| 21 |
+
|
| 22 |
+
for i in range(9):
|
| 23 |
+
board[i] = temp_index % 3
|
| 24 |
+
temp_index //= 3
|
| 25 |
+
|
| 26 |
+
return board
|
| 27 |
+
|
| 28 |
+
def display_board(board):
|
| 29 |
+
"""Display Tic-Tac-Toe board"""
|
| 30 |
+
symbols = {0: '.', 1: 'X', 2: 'O'}
|
| 31 |
+
|
| 32 |
+
print("Current board:")
|
| 33 |
+
for row in range(3):
|
| 34 |
+
row_chars = [symbols[board[row*3 + col]] for col in range(3)]
|
| 35 |
+
print(" " + " | ".join(row_chars))
|
| 36 |
+
if row < 2:
|
| 37 |
+
print(" " + "-" * 9)
|
| 38 |
+
|
| 39 |
+
def test_ai_with_common_scenarios(model):
|
| 40 |
+
"""Test AI with common Tic-Tac-Toe scenarios"""
|
| 41 |
+
|
| 42 |
+
print("\n🤖 TESTING AI WITH COMMON SCENARIOS")
|
| 43 |
+
print("="*50)
|
| 44 |
+
|
| 45 |
+
test_cases = [
|
| 46 |
+
# Empty board
|
| 47 |
+
{
|
| 48 |
+
"name": "Empty board - first move",
|
| 49 |
+
"board": [0,0,0,0,0,0,0,0,0],
|
| 50 |
+
"expected": "Center (4) or corners (0,2,6,8)"
|
| 51 |
+
},
|
| 52 |
+
# Center taken by opponent
|
| 53 |
+
{
|
| 54 |
+
"name": "Opponent took center",
|
| 55 |
+
"board": [0,0,0,0,1,0,0,0,0],
|
| 56 |
+
"expected": "A corner (0,2,6,8)"
|
| 57 |
+
},
|
| 58 |
+
# Winning move for AI
|
| 59 |
+
{
|
| 60 |
+
"name": "AI can win in one move",
|
| 61 |
+
"board": [2,1,0, # O X .
|
| 62 |
+
1,2,0, # X O .
|
| 63 |
+
0,0,0], # . . .
|
| 64 |
+
"expected": "Move 8 to complete diagonal"
|
| 65 |
+
},
|
| 66 |
+
# Block opponent's winning move
|
| 67 |
+
{
|
| 68 |
+
"name": "Block opponent's winning move",
|
| 69 |
+
"board": [1,0,0, # X . .
|
| 70 |
+
1,2,0, # X O .
|
| 71 |
+
0,0,0], # . . .
|
| 72 |
+
"expected": "Move 6 to block vertical"
|
| 73 |
+
},
|
| 74 |
+
# Fork opportunity
|
| 75 |
+
{
|
| 76 |
+
"name": "Fork opportunity",
|
| 77 |
+
"board": [2,0,1, # O . X
|
| 78 |
+
0,1,0, # . X .
|
| 79 |
+
0,0,0], # . . .
|
| 80 |
+
"expected": "Move 8 to create fork"
|
| 81 |
+
}
|
| 82 |
+
]
|
| 83 |
+
|
| 84 |
+
for test in test_cases:
|
| 85 |
+
print(f"\n📋 {test['name']}")
|
| 86 |
+
display_board(test['board'])
|
| 87 |
+
|
| 88 |
+
# Convert board to state index
|
| 89 |
+
state_idx = 0
|
| 90 |
+
for i, cell in enumerate(test['board']):
|
| 91 |
+
state_idx += cell * (3 ** i)
|
| 92 |
+
|
| 93 |
+
# Get Q-values for this state
|
| 94 |
+
q_values = model[state_idx]
|
| 95 |
+
|
| 96 |
+
# Get available moves
|
| 97 |
+
available_moves = [i for i, cell in enumerate(test['board']) if cell == 0]
|
| 98 |
+
|
| 99 |
+
if available_moves:
|
| 100 |
+
print("\nAvailable moves and Q-values:")
|
| 101 |
+
for move in available_moves:
|
| 102 |
+
q_val = q_values[move]
|
| 103 |
+
row, col = divmod(move, 3)
|
| 104 |
+
symbol = "⚠️" if q_val > 0 else " "
|
| 105 |
+
print(f"{symbol} Move {move} (row {row}, col {col}): {q_val:.4f}")
|
| 106 |
+
|
| 107 |
+
# AI's recommended move
|
| 108 |
+
best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
|
| 109 |
+
row, col = divmod(best_move, 3)
|
| 110 |
+
print(f"\n🤖 AI's choice: Move {best_move} (row {row}, col {col})")
|
| 111 |
+
print(f"📋 Expected: {test['expected']}")
|
| 112 |
+
|
| 113 |
+
# Check if AI learned something useful
|
| 114 |
+
best_q = q_values[best_move]
|
| 115 |
+
if best_q > 0:
|
| 116 |
+
print("✅ AI has positive association with this move")
|
| 117 |
+
elif best_q < 0:
|
| 118 |
+
print("❌ AI has negative association with this move (thinks it's bad)")
|
| 119 |
+
else:
|
| 120 |
+
print("➖ AI has no learning for this move")
|
| 121 |
+
else:
|
| 122 |
+
print("No available moves!")
|
| 123 |
+
|
| 124 |
+
def analyze_learning_patterns(model):
|
| 125 |
+
"""Analyze what patterns the AI has learned"""
|
| 126 |
+
|
| 127 |
+
print("\n🔍 ANALYZING LEARNING PATTERNS")
|
| 128 |
+
print("="*50)
|
| 129 |
+
|
| 130 |
+
# Find all states with non-zero Q-values
|
| 131 |
+
non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
|
| 132 |
+
|
| 133 |
+
print(f"Total states with learning: {len(non_zero_indices)}")
|
| 134 |
+
|
| 135 |
+
# Categorize by game phase
|
| 136 |
+
phases = {
|
| 137 |
+
"early": [], # 0-2 moves made
|
| 138 |
+
"mid": [], # 3-5 moves made
|
| 139 |
+
"late": [] # 6-8 moves made
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
for idx in non_zero_indices:
|
| 143 |
+
board = decode_state(idx)
|
| 144 |
+
moves_made = sum(1 for cell in board if cell != 0)
|
| 145 |
+
|
| 146 |
+
if moves_made <= 2:
|
| 147 |
+
phases["early"].append(idx)
|
| 148 |
+
elif moves_made <= 5:
|
| 149 |
+
phases["mid"].append(idx)
|
| 150 |
+
else:
|
| 151 |
+
phases["late"].append(idx)
|
| 152 |
+
|
| 153 |
+
print(f"\nLearning by game phase:")
|
| 154 |
+
print(f" Early game (0-2 moves): {len(phases['early'])} states")
|
| 155 |
+
print(f" Mid game (3-5 moves): {len(phases['mid'])} states")
|
| 156 |
+
print(f" Late game (6-8 moves): {len(phases['late'])} states")
|
| 157 |
+
|
| 158 |
+
# Analyze Q-value distribution
|
| 159 |
+
all_q_values = model[non_zero_indices].flatten()
|
| 160 |
+
non_zero_q = all_q_values[all_q_values != 0]
|
| 161 |
+
|
| 162 |
+
if len(non_zero_q) > 0:
|
| 163 |
+
print(f"\nQ-value analysis:")
|
| 164 |
+
print(f" Total Q-values: {len(non_zero_q)}")
|
| 165 |
+
print(f" Positive Q-values: {np.sum(non_zero_q > 0)}")
|
| 166 |
+
print(f" Negative Q-values: {np.sum(non_zero_q < 0)}")
|
| 167 |
+
print(f" Average Q-value: {np.mean(non_zero_q):.4f}")
|
| 168 |
+
print(f" Most positive: {np.max(non_zero_q):.4f}")
|
| 169 |
+
print(f" Most negative: {np.min(non_zero_q):.4f}")
|
| 170 |
+
|
| 171 |
+
# Show examples of what AI learned
|
| 172 |
+
print("\n📚 Examples of learned states:")
|
| 173 |
+
|
| 174 |
+
# Find states with positive Q-values
|
| 175 |
+
positive_states = []
|
| 176 |
+
for idx in non_zero_indices:
|
| 177 |
+
if np.any(model[idx] > 0):
|
| 178 |
+
positive_states.append(idx)
|
| 179 |
+
|
| 180 |
+
if positive_states:
|
| 181 |
+
print(f"\nFound {len(positive_states)} states with positive associations")
|
| 182 |
+
for i, idx in enumerate(positive_states[:3]): # Show first 3
|
| 183 |
+
board = decode_state(idx)
|
| 184 |
+
print(f"\nExample {i+1}:")
|
| 185 |
+
display_board(board)
|
| 186 |
+
|
| 187 |
+
q_values = model[idx]
|
| 188 |
+
positive_moves = np.where(q_values > 0)[0]
|
| 189 |
+
print("Moves AI thinks are good:")
|
| 190 |
+
for move in positive_moves:
|
| 191 |
+
print(f" Move {move} (row {move//3}, col {move%3}): {q_values[move]:.4f}")
|
| 192 |
+
else:
|
| 193 |
+
print("No positive associations found - AI hasn't learned winning strategies yet")
|
| 194 |
+
|
| 195 |
+
def check_for_specific_patterns(model):
|
| 196 |
+
"""Check if AI has learned specific Tic-Tac-Toe strategies"""
|
| 197 |
+
|
| 198 |
+
print("\n🎯 CHECKING FOR SPECIFIC STRATEGIES")
|
| 199 |
+
print("="*50)
|
| 200 |
+
|
| 201 |
+
strategies = {
|
| 202 |
+
"prefer_center": 0,
|
| 203 |
+
"prefer_corners": 0,
|
| 204 |
+
"prefer_edges": 0,
|
| 205 |
+
"block_opponent": 0,
|
| 206 |
+
"create_fork": 0,
|
| 207 |
+
"avoid_losing": 0
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
# Check common winning/blocking patterns
|
| 211 |
+
patterns_to_check = [
|
| 212 |
+
# Center preference
|
| 213 |
+
([0,0,0,0,0,0,0,0,0], [4], "prefer_center"),
|
| 214 |
+
|
| 215 |
+
# Corner openings
|
| 216 |
+
([0,0,0,0,1,0,0,0,0], [0,2,6,8], "prefer_corners"),
|
| 217 |
+
|
| 218 |
+
# Block vertical
|
| 219 |
+
([1,0,0,1,2,0,0,0,0], [6], "block_opponent"),
|
| 220 |
+
|
| 221 |
+
# Block horizontal
|
| 222 |
+
([1,1,0,0,2,0,0,0,0], [2], "block_opponent"),
|
| 223 |
+
|
| 224 |
+
# Block diagonal
|
| 225 |
+
([1,0,0,0,1,0,0,0,0], [8], "block_opponent"),
|
| 226 |
+
]
|
| 227 |
+
|
| 228 |
+
for board_pattern, good_moves, strategy in patterns_to_check:
|
| 229 |
+
state_idx = 0
|
| 230 |
+
for i, cell in enumerate(board_pattern):
|
| 231 |
+
state_idx += cell * (3 ** i)
|
| 232 |
+
|
| 233 |
+
q_values = model[state_idx]
|
| 234 |
+
available_moves = [i for i, cell in enumerate(board_pattern) if cell == 0]
|
| 235 |
+
|
| 236 |
+
if available_moves:
|
| 237 |
+
# Check if AI prefers any of the good moves
|
| 238 |
+
best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
|
| 239 |
+
if best_move in good_moves:
|
| 240 |
+
strategies[strategy] += 1
|
| 241 |
+
print(f"✅ AI correctly prefers {strategy.replace('_', ' ')} in this scenario")
|
| 242 |
+
else:
|
| 243 |
+
print(f"❌ AI doesn't recognize {strategy.replace('_', ' ')}")
|
| 244 |
+
|
| 245 |
+
print(f"\nStrategy recognition summary:")
|
| 246 |
+
for strategy, count in strategies.items():
|
| 247 |
+
print(f" {strategy}: {count}/1")
|
| 248 |
+
|
| 249 |
+
def train_ai_offline(model, num_games=1000):
|
| 250 |
+
"""Simulate games to improve the AI offline"""
|
| 251 |
+
|
| 252 |
+
print("\n🎮 SIMULATING OFFLINE TRAINING")
|
| 253 |
+
print("="*50)
|
| 254 |
+
|
| 255 |
+
print(f"Simulating {num_games} games of self-play...")
|
| 256 |
+
|
| 257 |
+
# Simple self-play simulation
|
| 258 |
+
import random
|
| 259 |
+
|
| 260 |
+
# We'll create a simple improvement by adding some basic strategies
|
| 261 |
+
original_non_zero = np.count_nonzero(model)
|
| 262 |
+
|
| 263 |
+
# Add some basic Tic-Tac-Toe knowledge
|
| 264 |
+
# Center is good
|
| 265 |
+
empty_board_idx = 0 # All zeros
|
| 266 |
+
model[empty_board_idx][4] = 0.1 # Center is good
|
| 267 |
+
|
| 268 |
+
# Corners are good when center is taken
|
| 269 |
+
center_taken_idx = 3**4 # Only center is 1
|
| 270 |
+
for corner in [0, 2, 6, 8]:
|
| 271 |
+
model[center_taken_idx][corner] = 0.08
|
| 272 |
+
|
| 273 |
+
# Blocking is good
|
| 274 |
+
# Example: opponent has two in a row
|
| 275 |
+
for i in range(9):
|
| 276 |
+
board = [0] * 9
|
| 277 |
+
board[i] = 1
|
| 278 |
+
board[(i+3)%9] = 1
|
| 279 |
+
if board[6] == 0: # Check if third in column is empty
|
| 280 |
+
state_idx = 0
|
| 281 |
+
for j, cell in enumerate(board):
|
| 282 |
+
state_idx += cell * (3 ** j)
|
| 283 |
+
blocking_move = 6
|
| 284 |
+
model[state_idx][blocking_move] = 0.15
|
| 285 |
+
|
| 286 |
+
new_non_zero = np.count_nonzero(model)
|
| 287 |
+
improvement = new_non_zero - original_non_zero
|
| 288 |
+
|
| 289 |
+
print(f"Added {improvement} new learned values")
|
| 290 |
+
print("Basic Tic-Tac-Toe strategies have been added to the AI")
|
| 291 |
+
|
| 292 |
+
return model
|
| 293 |
+
|
| 294 |
+
def save_improved_model(model):
|
| 295 |
+
"""Save the improved model"""
|
| 296 |
+
filename = "ttt_ai_model_improved.pkl"
|
| 297 |
+
|
| 298 |
+
# Load existing data to preserve experience buffer
|
| 299 |
+
original_filename = "ttt_ai_model.pkl"
|
| 300 |
+
if os.path.exists(original_filename):
|
| 301 |
+
with open(original_filename, 'rb') as f:
|
| 302 |
+
data = pickle.load(f)
|
| 303 |
+
else:
|
| 304 |
+
data = {'model': model, 'experience': []}
|
| 305 |
+
|
| 306 |
+
data['model'] = model
|
| 307 |
+
|
| 308 |
+
with open(filename, 'wb') as f:
|
| 309 |
+
pickle.dump(data, f)
|
| 310 |
+
|
| 311 |
+
print(f"\n💾 Improved model saved to '{filename}'")
|
| 312 |
+
|
| 313 |
+
def main():
|
| 314 |
+
"""Main function to analyze and improve the AI"""
|
| 315 |
+
|
| 316 |
+
print("🤖 TIC-TAC-TOE AI DIAGNOSTIC TOOL")
|
| 317 |
+
print("="*60)
|
| 318 |
+
|
| 319 |
+
# Load the model
|
| 320 |
+
model = load_ai_model()
|
| 321 |
+
if model is None:
|
| 322 |
+
return
|
| 323 |
+
|
| 324 |
+
# Test with common scenarios
|
| 325 |
+
test_ai_with_common_scenarios(model)
|
| 326 |
+
|
| 327 |
+
# Analyze learning patterns
|
| 328 |
+
analyze_learning_patterns(model)
|
| 329 |
+
|
| 330 |
+
# Check for specific strategies
|
| 331 |
+
check_for_specific_patterns(model)
|
| 332 |
+
|
| 333 |
+
# Offer to improve the AI
|
| 334 |
+
print("\n" + "="*60)
|
| 335 |
+
improve = input("\nWould you like to add basic Tic-Tac-Toe knowledge to the AI? (y/n): ").strip().lower()
|
| 336 |
+
|
| 337 |
+
if improve == 'y':
|
| 338 |
+
model = train_ai_offline(model)
|
| 339 |
+
save_improved_model(model)
|
| 340 |
+
print("\n✅ AI has been improved with basic strategies!")
|
| 341 |
+
print("Restart the game and use 'ttt_ai_model_improved.pkl' for better performance")
|
| 342 |
+
else:
|
| 343 |
+
print("\n📝 Recommendations for improving the AI through gameplay:")
|
| 344 |
+
print("1. Play more games against the AI")
|
| 345 |
+
print("2. Let the AI watch you play against itself")
|
| 346 |
+
print("3. Adjust learning rate to 0.2-0.3 for faster learning")
|
| 347 |
+
print("4. Reduce exploration rate to 0.1 once AI starts winning")
|
| 348 |
+
print("5. Play both as X and O to teach both perspectives")
|
| 349 |
+
|
| 350 |
+
if __name__ == "__main__":
|
| 351 |
+
main()
|
Tic Tac Toe RL/app.py
ADDED
|
@@ -0,0 +1,721 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import random
|
| 4 |
+
import numpy as np
|
| 5 |
+
import pandas as pd
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
from collections import deque
|
| 8 |
+
import datetime
|
| 9 |
+
import csv
|
| 10 |
+
import logging
|
| 11 |
+
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
|
| 12 |
+
QHBoxLayout, QGridLayout, QPushButton, QLabel,
|
| 13 |
+
QTextEdit, QTabWidget, QGroupBox, QSpinBox,
|
| 14 |
+
QDoubleSpinBox, QCheckBox, QProgressBar, QComboBox)
|
| 15 |
+
from PyQt5.QtCore import QTimer, Qt, QThread, pyqtSignal
|
| 16 |
+
from PyQt5.QtGui import QFont, QPalette, QColor
|
| 17 |
+
import torch
|
| 18 |
+
import torch.nn as nn
|
| 19 |
+
import torch.optim as optim
|
| 20 |
+
import torch.nn.functional as F
|
| 21 |
+
|
| 22 |
+
# Configure logging
|
| 23 |
+
logging.basicConfig(
|
| 24 |
+
level=logging.INFO,
|
| 25 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 26 |
+
handlers=[
|
| 27 |
+
logging.FileHandler('tic_tac_toe_training.log'),
|
| 28 |
+
logging.StreamHandler()
|
| 29 |
+
]
|
| 30 |
+
)
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
class DuelingDQN(nn.Module):
|
| 34 |
+
def __init__(self, input_size, output_size, hidden_size=256):
|
| 35 |
+
super(DuelingDQN, self).__init__()
|
| 36 |
+
self.input_size = input_size
|
| 37 |
+
self.output_size = output_size
|
| 38 |
+
|
| 39 |
+
# Feature layer
|
| 40 |
+
self.feature = nn.Sequential(
|
| 41 |
+
nn.Linear(input_size, hidden_size),
|
| 42 |
+
nn.ReLU(),
|
| 43 |
+
nn.Linear(hidden_size, hidden_size),
|
| 44 |
+
nn.ReLU(),
|
| 45 |
+
nn.Linear(hidden_size, hidden_size // 2),
|
| 46 |
+
nn.ReLU()
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Value stream
|
| 50 |
+
self.value_stream = nn.Sequential(
|
| 51 |
+
nn.Linear(hidden_size // 2, hidden_size // 4),
|
| 52 |
+
nn.ReLU(),
|
| 53 |
+
nn.Linear(hidden_size // 4, 1)
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
# Advantage stream
|
| 57 |
+
self.advantage_stream = nn.Sequential(
|
| 58 |
+
nn.Linear(hidden_size // 2, hidden_size // 4),
|
| 59 |
+
nn.ReLU(),
|
| 60 |
+
nn.Linear(hidden_size // 4, output_size)
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
def forward(self, state):
|
| 64 |
+
features = self.feature(state)
|
| 65 |
+
value = self.value_stream(features)
|
| 66 |
+
advantage = self.advantage_stream(features)
|
| 67 |
+
|
| 68 |
+
# Combine value and advantage
|
| 69 |
+
q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
|
| 70 |
+
return q_values
|
| 71 |
+
|
| 72 |
+
class TicTacToeEnvironment:
|
| 73 |
+
def __init__(self):
|
| 74 |
+
self.reset()
|
| 75 |
+
|
| 76 |
+
def reset(self):
|
| 77 |
+
self.board = np.zeros(9, dtype=int) # 0: empty, 1: X, 2: O
|
| 78 |
+
self.current_player = 1 # X starts
|
| 79 |
+
self.done = False
|
| 80 |
+
self.winner = 0
|
| 81 |
+
return self.get_state()
|
| 82 |
+
|
| 83 |
+
def get_state(self):
|
| 84 |
+
# Return board state as one-hot encoded
|
| 85 |
+
state = np.zeros(9 * 3, dtype=np.float32)
|
| 86 |
+
for i in range(9):
|
| 87 |
+
if self.board[i] == 0:
|
| 88 |
+
state[i * 3] = 1.0
|
| 89 |
+
elif self.board[i] == 1:
|
| 90 |
+
state[i * 3 + 1] = 1.0
|
| 91 |
+
else:
|
| 92 |
+
state[i * 3 + 2] = 1.0
|
| 93 |
+
return state
|
| 94 |
+
|
| 95 |
+
def get_valid_moves(self):
|
| 96 |
+
return [i for i in range(9) if self.board[i] == 0]
|
| 97 |
+
|
| 98 |
+
def step(self, action):
|
| 99 |
+
if self.done:
|
| 100 |
+
return self.get_state(), 0, True, {}
|
| 101 |
+
|
| 102 |
+
if self.board[action] != 0:
|
| 103 |
+
return self.get_state(), -5, True, {} # Invalid move penalty
|
| 104 |
+
|
| 105 |
+
# Make move
|
| 106 |
+
self.board[action] = self.current_player
|
| 107 |
+
|
| 108 |
+
# Check for win
|
| 109 |
+
if self.check_win(self.current_player):
|
| 110 |
+
self.done = True
|
| 111 |
+
self.winner = self.current_player
|
| 112 |
+
reward = 10 # Win reward
|
| 113 |
+
# Check for draw
|
| 114 |
+
elif len(self.get_valid_moves()) == 0:
|
| 115 |
+
self.done = True
|
| 116 |
+
reward = 2 # Draw reward
|
| 117 |
+
else:
|
| 118 |
+
reward = 0.1 # Small reward for valid move
|
| 119 |
+
self.current_player = 3 - self.current_player # Switch player (1->2, 2->1)
|
| 120 |
+
|
| 121 |
+
return self.get_state(), reward, self.done, {'winner': self.winner}
|
| 122 |
+
|
| 123 |
+
def check_win(self, player):
|
| 124 |
+
winning_combinations = [
|
| 125 |
+
[0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
|
| 126 |
+
[0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
|
| 127 |
+
[0, 4, 8], [2, 4, 6] # Diagonals
|
| 128 |
+
]
|
| 129 |
+
|
| 130 |
+
for combo in winning_combinations:
|
| 131 |
+
if all(self.board[i] == player for i in combo):
|
| 132 |
+
return True
|
| 133 |
+
return False
|
| 134 |
+
|
| 135 |
+
def render(self):
|
| 136 |
+
symbols = {0: ' ', 1: 'X', 2: 'O'}
|
| 137 |
+
board_str = ""
|
| 138 |
+
for i in range(3):
|
| 139 |
+
row = [symbols[self.board[i*3 + j]] for j in range(3)]
|
| 140 |
+
board_str += " " + " | ".join(row) + " \n"
|
| 141 |
+
if i < 2:
|
| 142 |
+
board_str += "-----------\n"
|
| 143 |
+
return board_str
|
| 144 |
+
|
| 145 |
+
class DuelingDQNAgent:
|
| 146 |
+
def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99,
|
| 147 |
+
epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.999,
|
| 148 |
+
target_update=1000, device='auto'):
|
| 149 |
+
self.state_size = state_size
|
| 150 |
+
self.action_size = action_size
|
| 151 |
+
self.learning_rate = learning_rate
|
| 152 |
+
self.gamma = gamma
|
| 153 |
+
self.epsilon = epsilon_start
|
| 154 |
+
self.epsilon_start = epsilon_start
|
| 155 |
+
self.epsilon_end = epsilon_end
|
| 156 |
+
self.epsilon_decay = epsilon_decay
|
| 157 |
+
self.target_update = target_update
|
| 158 |
+
self.memory = deque(maxlen=50000)
|
| 159 |
+
self.batch_size = 128
|
| 160 |
+
self.learn_step_counter = 0
|
| 161 |
+
|
| 162 |
+
# Device selection with MPS priority
|
| 163 |
+
if device == 'auto':
|
| 164 |
+
if torch.backends.mps.is_available():
|
| 165 |
+
self.device = torch.device("mps")
|
| 166 |
+
logger.info("Using MPS device (Apple Silicon)")
|
| 167 |
+
elif torch.cuda.is_available():
|
| 168 |
+
self.device = torch.device("cuda")
|
| 169 |
+
logger.info("Using CUDA device")
|
| 170 |
+
else:
|
| 171 |
+
self.device = torch.device("cpu")
|
| 172 |
+
logger.info("Using CPU device")
|
| 173 |
+
else:
|
| 174 |
+
self.device = torch.device(device)
|
| 175 |
+
|
| 176 |
+
# Networks
|
| 177 |
+
self.policy_net = DuelingDQN(state_size, action_size).to(self.device)
|
| 178 |
+
self.target_net = DuelingDQN(state_size, action_size).to(self.device)
|
| 179 |
+
self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate, weight_decay=1e-5)
|
| 180 |
+
|
| 181 |
+
self.update_target_network()
|
| 182 |
+
|
| 183 |
+
def update_target_network(self):
|
| 184 |
+
self.target_net.load_state_dict(self.policy_net.state_dict())
|
| 185 |
+
|
| 186 |
+
def remember(self, state, action, reward, next_state, done):
|
| 187 |
+
self.memory.append((state, action, reward, next_state, done))
|
| 188 |
+
|
| 189 |
+
def act(self, state, valid_moves, training=True):
|
| 190 |
+
if training and random.random() <= self.epsilon:
|
| 191 |
+
return random.choice(valid_moves)
|
| 192 |
+
|
| 193 |
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
| 194 |
+
with torch.no_grad():
|
| 195 |
+
q_values = self.policy_net(state_tensor)
|
| 196 |
+
|
| 197 |
+
# Mask invalid moves
|
| 198 |
+
q_values_np = q_values.cpu().numpy()[0]
|
| 199 |
+
masked_q_values = q_values_np.copy()
|
| 200 |
+
for i in range(self.action_size):
|
| 201 |
+
if i not in valid_moves:
|
| 202 |
+
masked_q_values[i] = -float('inf')
|
| 203 |
+
|
| 204 |
+
return np.argmax(masked_q_values)
|
| 205 |
+
|
| 206 |
+
def replay(self):
|
| 207 |
+
if len(self.memory) < self.batch_size:
|
| 208 |
+
return 0
|
| 209 |
+
|
| 210 |
+
batch = random.sample(self.memory, self.batch_size)
|
| 211 |
+
states, actions, rewards, next_states, dones = zip(*batch)
|
| 212 |
+
|
| 213 |
+
states = torch.FloatTensor(np.array(states)).to(self.device)
|
| 214 |
+
actions = torch.LongTensor(actions).to(self.device)
|
| 215 |
+
rewards = torch.FloatTensor(rewards).to(self.device)
|
| 216 |
+
next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
|
| 217 |
+
dones = torch.BoolTensor(dones).to(self.device)
|
| 218 |
+
|
| 219 |
+
current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
|
| 220 |
+
|
| 221 |
+
with torch.no_grad():
|
| 222 |
+
next_actions = self.policy_net(next_states).max(1)[1]
|
| 223 |
+
next_q_values = self.target_net(next_states).gather(1, next_actions.unsqueeze(1))
|
| 224 |
+
target_q_values = rewards.unsqueeze(1) + (self.gamma * next_q_values * ~dones.unsqueeze(1))
|
| 225 |
+
|
| 226 |
+
loss = F.smooth_l1_loss(current_q_values, target_q_values)
|
| 227 |
+
|
| 228 |
+
self.optimizer.zero_grad()
|
| 229 |
+
loss.backward()
|
| 230 |
+
# Gradient clipping
|
| 231 |
+
torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
|
| 232 |
+
self.optimizer.step()
|
| 233 |
+
|
| 234 |
+
# Update target network
|
| 235 |
+
self.learn_step_counter += 1
|
| 236 |
+
if self.learn_step_counter % self.target_update == 0:
|
| 237 |
+
self.update_target_network()
|
| 238 |
+
|
| 239 |
+
# Decay epsilon
|
| 240 |
+
if self.epsilon > self.epsilon_end:
|
| 241 |
+
self.epsilon *= self.epsilon_decay
|
| 242 |
+
|
| 243 |
+
return loss.item()
|
| 244 |
+
|
| 245 |
+
def save_model(self, filepath):
|
| 246 |
+
torch.save({
|
| 247 |
+
'policy_net_state_dict': self.policy_net.state_dict(),
|
| 248 |
+
'target_net_state_dict': self.target_net.state_dict(),
|
| 249 |
+
'optimizer_state_dict': self.optimizer.state_dict(),
|
| 250 |
+
'epsilon': self.epsilon,
|
| 251 |
+
'memory': list(self.memory)[-10000:] # Save recent memory
|
| 252 |
+
}, filepath)
|
| 253 |
+
logger.info(f"Model saved to {filepath}")
|
| 254 |
+
|
| 255 |
+
def load_model(self, filepath):
|
| 256 |
+
if os.path.exists(filepath):
|
| 257 |
+
checkpoint = torch.load(filepath, map_location=self.device)
|
| 258 |
+
self.policy_net.load_state_dict(checkpoint['policy_net_state_dict'])
|
| 259 |
+
self.target_net.load_state_dict(checkpoint['target_net_state_dict'])
|
| 260 |
+
self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
|
| 261 |
+
self.epsilon = checkpoint['epsilon']
|
| 262 |
+
if 'memory' in checkpoint:
|
| 263 |
+
self.memory = deque(checkpoint['memory'], maxlen=50000)
|
| 264 |
+
logger.info(f"Model loaded from {filepath}")
|
| 265 |
+
return True
|
| 266 |
+
return False
|
| 267 |
+
|
| 268 |
+
class TrainingThread(QThread):
|
| 269 |
+
update_signal = pyqtSignal(dict)
|
| 270 |
+
finished_signal = pyqtSignal()
|
| 271 |
+
|
| 272 |
+
def __init__(self, agent, env, episodes=1000, save_interval=100):
|
| 273 |
+
super().__init__()
|
| 274 |
+
self.agent = agent
|
| 275 |
+
self.env = env
|
| 276 |
+
self.episodes = episodes
|
| 277 |
+
self.save_interval = save_interval
|
| 278 |
+
self.running = True
|
| 279 |
+
self.stats = {
|
| 280 |
+
'episodes': 0,
|
| 281 |
+
'wins': 0,
|
| 282 |
+
'losses': 0,
|
| 283 |
+
'draws': 0,
|
| 284 |
+
'avg_reward': 0,
|
| 285 |
+
'epsilon': agent.epsilon,
|
| 286 |
+
'loss': 0
|
| 287 |
+
}
|
| 288 |
+
self.rewards = []
|
| 289 |
+
self.wins = []
|
| 290 |
+
self.losses = []
|
| 291 |
+
self.draws = []
|
| 292 |
+
|
| 293 |
+
def run(self):
|
| 294 |
+
logger.info(f"Starting training for {self.episodes} episodes")
|
| 295 |
+
|
| 296 |
+
for episode in range(self.episodes):
|
| 297 |
+
if not self.running:
|
| 298 |
+
break
|
| 299 |
+
|
| 300 |
+
state = self.env.reset()
|
| 301 |
+
total_reward = 0
|
| 302 |
+
steps = 0
|
| 303 |
+
|
| 304 |
+
while True:
|
| 305 |
+
valid_moves = self.env.get_valid_moves()
|
| 306 |
+
if not valid_moves:
|
| 307 |
+
break
|
| 308 |
+
|
| 309 |
+
action = self.agent.act(state, valid_moves)
|
| 310 |
+
next_state, reward, done, info = self.env.step(action)
|
| 311 |
+
|
| 312 |
+
self.agent.remember(state, action, reward, next_state, done)
|
| 313 |
+
loss = self.agent.replay()
|
| 314 |
+
|
| 315 |
+
total_reward += reward
|
| 316 |
+
steps += 1
|
| 317 |
+
state = next_state
|
| 318 |
+
|
| 319 |
+
if done:
|
| 320 |
+
# Update statistics
|
| 321 |
+
if info.get('winner') == 1: # Agent win
|
| 322 |
+
self.stats['wins'] += 1
|
| 323 |
+
elif info.get('winner') == 2: # Opponent win
|
| 324 |
+
self.stats['losses'] += 1
|
| 325 |
+
else: # Draw
|
| 326 |
+
self.stats['draws'] += 1
|
| 327 |
+
|
| 328 |
+
self.stats['loss'] = loss if loss else 0
|
| 329 |
+
break
|
| 330 |
+
|
| 331 |
+
# Update statistics
|
| 332 |
+
self.stats['episodes'] = episode + 1
|
| 333 |
+
self.stats['epsilon'] = self.agent.epsilon
|
| 334 |
+
self.rewards.append(total_reward)
|
| 335 |
+
self.stats['avg_reward'] = np.mean(self.rewards[-100:]) if self.rewards else 0
|
| 336 |
+
|
| 337 |
+
# Save data periodically
|
| 338 |
+
if (episode + 1) % self.save_interval == 0:
|
| 339 |
+
self.save_training_data(episode + 1)
|
| 340 |
+
self.agent.save_model(f'models/dueling_dqn_episode_{episode + 1}.pth')
|
| 341 |
+
|
| 342 |
+
# Emit update signal every 10 episodes or at the end
|
| 343 |
+
if (episode + 1) % 10 == 0 or episode == self.episodes - 1:
|
| 344 |
+
self.update_signal.emit(self.stats.copy())
|
| 345 |
+
|
| 346 |
+
self.finished_signal.emit()
|
| 347 |
+
|
| 348 |
+
def stop(self):
|
| 349 |
+
self.running = False
|
| 350 |
+
|
| 351 |
+
def save_training_data(self, episode):
|
| 352 |
+
data = {
|
| 353 |
+
'episode': episode,
|
| 354 |
+
'epsilon': self.agent.epsilon,
|
| 355 |
+
'avg_reward': self.stats['avg_reward'],
|
| 356 |
+
'wins': self.stats['wins'],
|
| 357 |
+
'losses': self.stats['losses'],
|
| 358 |
+
'draws': self.stats['draws'],
|
| 359 |
+
'win_rate': self.stats['wins'] / episode if episode > 0 else 0,
|
| 360 |
+
'timestamp': datetime.datetime.now().isoformat()
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
# Save to CSV
|
| 364 |
+
file_exists = os.path.isfile('training_data.csv')
|
| 365 |
+
with open('training_data.csv', 'a', newline='') as f:
|
| 366 |
+
writer = csv.DictWriter(f, fieldnames=data.keys())
|
| 367 |
+
if not file_exists:
|
| 368 |
+
writer.writeheader()
|
| 369 |
+
writer.writerow(data)
|
| 370 |
+
|
| 371 |
+
class TicTacToeGUI(QMainWindow):
|
| 372 |
+
def __init__(self):
|
| 373 |
+
super().__init__()
|
| 374 |
+
self.env = TicTacToeEnvironment()
|
| 375 |
+
self.agent = DuelingDQNAgent(state_size=27, action_size=9) # 9 positions * 3 states each
|
| 376 |
+
self.training_thread = None
|
| 377 |
+
self.is_training = False
|
| 378 |
+
|
| 379 |
+
# Create models directory
|
| 380 |
+
os.makedirs('models', exist_ok=True)
|
| 381 |
+
|
| 382 |
+
self.init_ui()
|
| 383 |
+
self.update_display()
|
| 384 |
+
|
| 385 |
+
def init_ui(self):
|
| 386 |
+
self.setWindowTitle("Dueling DQN Tic-Tac-Toe Trainer")
|
| 387 |
+
self.setGeometry(100, 100, 1200, 800)
|
| 388 |
+
|
| 389 |
+
# Central widget and main layout
|
| 390 |
+
central_widget = QWidget()
|
| 391 |
+
self.setCentralWidget(central_widget)
|
| 392 |
+
main_layout = QHBoxLayout(central_widget)
|
| 393 |
+
|
| 394 |
+
# Left panel - Game board and controls
|
| 395 |
+
left_panel = QVBoxLayout()
|
| 396 |
+
|
| 397 |
+
# Game board
|
| 398 |
+
board_group = QGroupBox("Tic-Tac-Toe Board")
|
| 399 |
+
board_layout = QGridLayout()
|
| 400 |
+
self.board_buttons = []
|
| 401 |
+
for i in range(9):
|
| 402 |
+
btn = QPushButton('')
|
| 403 |
+
btn.setFixedSize(80, 80)
|
| 404 |
+
btn.setFont(QFont('Arial', 24))
|
| 405 |
+
btn.clicked.connect(lambda checked, pos=i: self.human_move(pos))
|
| 406 |
+
board_layout.addWidget(btn, i // 3, i % 3)
|
| 407 |
+
self.board_buttons.append(btn)
|
| 408 |
+
board_group.setLayout(board_layout)
|
| 409 |
+
left_panel.addWidget(board_group)
|
| 410 |
+
|
| 411 |
+
# Game controls
|
| 412 |
+
controls_group = QGroupBox("Game Controls")
|
| 413 |
+
controls_layout = QVBoxLayout()
|
| 414 |
+
|
| 415 |
+
self.status_label = QLabel("Status: Ready")
|
| 416 |
+
controls_layout.addWidget(self.status_label)
|
| 417 |
+
|
| 418 |
+
control_buttons_layout = QHBoxLayout()
|
| 419 |
+
self.reset_btn = QPushButton("New Game")
|
| 420 |
+
self.reset_btn.clicked.connect(self.reset_game)
|
| 421 |
+
control_buttons_layout.addWidget(self.reset_btn)
|
| 422 |
+
|
| 423 |
+
self.auto_play_btn = QPushButton("Auto Play")
|
| 424 |
+
self.auto_play_btn.clicked.connect(self.auto_play)
|
| 425 |
+
control_buttons_layout.addWidget(self.auto_play_btn)
|
| 426 |
+
|
| 427 |
+
controls_layout.addLayout(control_buttons_layout)
|
| 428 |
+
controls_group.setLayout(controls_layout)
|
| 429 |
+
left_panel.addWidget(controls_group)
|
| 430 |
+
|
| 431 |
+
# Right panel - Training and visualization
|
| 432 |
+
right_panel = QVBoxLayout()
|
| 433 |
+
|
| 434 |
+
# Training controls
|
| 435 |
+
training_group = QGroupBox("Training Controls")
|
| 436 |
+
training_layout = QVBoxLayout()
|
| 437 |
+
|
| 438 |
+
# Training parameters
|
| 439 |
+
params_layout = QGridLayout()
|
| 440 |
+
|
| 441 |
+
params_layout.addWidget(QLabel("Episodes:"), 0, 0)
|
| 442 |
+
self.episodes_spin = QSpinBox()
|
| 443 |
+
self.episodes_spin.setRange(100, 100000)
|
| 444 |
+
self.episodes_spin.setValue(5000)
|
| 445 |
+
params_layout.addWidget(self.episodes_spin, 0, 1)
|
| 446 |
+
|
| 447 |
+
params_layout.addWidget(QLabel("Learning Rate:"), 1, 0)
|
| 448 |
+
self.lr_spin = QDoubleSpinBox()
|
| 449 |
+
self.lr_spin.setRange(0.0001, 0.01)
|
| 450 |
+
self.lr_spin.setValue(0.001)
|
| 451 |
+
self.lr_spin.setSingleStep(0.0001)
|
| 452 |
+
self.lr_spin.setDecimals(4)
|
| 453 |
+
params_layout.addWidget(self.lr_spin, 1, 1)
|
| 454 |
+
|
| 455 |
+
params_layout.addWidget(QLabel("Gamma:"), 2, 0)
|
| 456 |
+
self.gamma_spin = QDoubleSpinBox()
|
| 457 |
+
self.gamma_spin.setRange(0.1, 0.999)
|
| 458 |
+
self.gamma_spin.setValue(0.99)
|
| 459 |
+
self.gamma_spin.setSingleStep(0.001)
|
| 460 |
+
self.gamma_spin.setDecimals(3)
|
| 461 |
+
params_layout.addWidget(self.gamma_spin, 2, 1)
|
| 462 |
+
|
| 463 |
+
params_layout.addWidget(QLabel("Device:"), 3, 0)
|
| 464 |
+
self.device_combo = QComboBox()
|
| 465 |
+
self.device_combo.addItems(['auto', 'cpu', 'mps', 'cuda'])
|
| 466 |
+
self.device_combo.setCurrentText('auto')
|
| 467 |
+
params_layout.addWidget(self.device_combo, 3, 1)
|
| 468 |
+
|
| 469 |
+
training_layout.addLayout(params_layout)
|
| 470 |
+
|
| 471 |
+
# Training buttons
|
| 472 |
+
train_buttons_layout = QHBoxLayout()
|
| 473 |
+
|
| 474 |
+
self.train_btn = QPushButton("Start Training")
|
| 475 |
+
self.train_btn.clicked.connect(self.toggle_training)
|
| 476 |
+
train_buttons_layout.addWidget(self.train_btn)
|
| 477 |
+
|
| 478 |
+
self.load_model_btn = QPushButton("Load Model")
|
| 479 |
+
self.load_model_btn.clicked.connect(self.load_model)
|
| 480 |
+
train_buttons_layout.addWidget(self.load_model_btn)
|
| 481 |
+
|
| 482 |
+
self.save_model_btn = QPushButton("Save Model")
|
| 483 |
+
self.save_model_btn.clicked.connect(self.save_model)
|
| 484 |
+
train_buttons_layout.addWidget(self.save_model_btn)
|
| 485 |
+
|
| 486 |
+
training_layout.addLayout(train_buttons_layout)
|
| 487 |
+
|
| 488 |
+
# Progress bar
|
| 489 |
+
self.progress_bar = QProgressBar()
|
| 490 |
+
training_layout.addWidget(self.progress_bar)
|
| 491 |
+
|
| 492 |
+
training_group.setLayout(training_layout)
|
| 493 |
+
right_panel.addWidget(training_group)
|
| 494 |
+
|
| 495 |
+
# Statistics
|
| 496 |
+
stats_group = QGroupBox("Training Statistics")
|
| 497 |
+
stats_layout = QVBoxLayout()
|
| 498 |
+
|
| 499 |
+
self.stats_text = QTextEdit()
|
| 500 |
+
self.stats_text.setReadOnly(True)
|
| 501 |
+
self.stats_text.setMaximumHeight(150)
|
| 502 |
+
stats_layout.addWidget(self.stats_text)
|
| 503 |
+
|
| 504 |
+
stats_group.setLayout(stats_layout)
|
| 505 |
+
right_panel.addWidget(stats_group)
|
| 506 |
+
|
| 507 |
+
# Log output
|
| 508 |
+
log_group = QGroupBox("Training Log")
|
| 509 |
+
log_layout = QVBoxLayout()
|
| 510 |
+
|
| 511 |
+
self.log_text = QTextEdit()
|
| 512 |
+
self.log_text.setReadOnly(True)
|
| 513 |
+
log_layout.addWidget(self.log_text)
|
| 514 |
+
|
| 515 |
+
log_group.setLayout(log_layout)
|
| 516 |
+
right_panel.addWidget(log_group)
|
| 517 |
+
|
| 518 |
+
# Add panels to main layout
|
| 519 |
+
main_layout.addLayout(left_panel, 1)
|
| 520 |
+
main_layout.addLayout(right_panel, 1)
|
| 521 |
+
|
| 522 |
+
# Timer for auto-play
|
| 523 |
+
self.auto_play_timer = QTimer()
|
| 524 |
+
self.auto_play_timer.timeout.connect(self.auto_play_step)
|
| 525 |
+
|
| 526 |
+
def toggle_training(self):
|
| 527 |
+
if self.is_training:
|
| 528 |
+
self.stop_training()
|
| 529 |
+
else:
|
| 530 |
+
self.start_training()
|
| 531 |
+
|
| 532 |
+
def start_training(self):
|
| 533 |
+
device = self.device_combo.currentText()
|
| 534 |
+
self.agent = DuelingDQNAgent(
|
| 535 |
+
state_size=27,
|
| 536 |
+
action_size=9,
|
| 537 |
+
learning_rate=self.lr_spin.value(),
|
| 538 |
+
gamma=self.gamma_spin.value(),
|
| 539 |
+
device=device
|
| 540 |
+
)
|
| 541 |
+
|
| 542 |
+
self.training_thread = TrainingThread(
|
| 543 |
+
self.agent,
|
| 544 |
+
TicTacToeEnvironment(),
|
| 545 |
+
episodes=self.episodes_spin.value(),
|
| 546 |
+
save_interval=100
|
| 547 |
+
)
|
| 548 |
+
|
| 549 |
+
self.training_thread.update_signal.connect(self.update_training_stats)
|
| 550 |
+
self.training_thread.finished_signal.connect(self.training_finished)
|
| 551 |
+
|
| 552 |
+
self.is_training = True
|
| 553 |
+
self.train_btn.setText("Stop Training")
|
| 554 |
+
self.status_label.setText("Status: Training...")
|
| 555 |
+
self.progress_bar.setRange(0, self.episodes_spin.value())
|
| 556 |
+
|
| 557 |
+
self.training_thread.start()
|
| 558 |
+
|
| 559 |
+
def stop_training(self):
|
| 560 |
+
if self.training_thread:
|
| 561 |
+
self.training_thread.stop()
|
| 562 |
+
self.training_thread.wait()
|
| 563 |
+
self.is_training = False
|
| 564 |
+
self.train_btn.setText("Start Training")
|
| 565 |
+
self.status_label.setText("Status: Training Stopped")
|
| 566 |
+
|
| 567 |
+
def training_finished(self):
|
| 568 |
+
self.is_training = False
|
| 569 |
+
self.train_btn.setText("Start Training")
|
| 570 |
+
self.status_label.setText("Status: Training Completed")
|
| 571 |
+
self.log_message("Training completed!")
|
| 572 |
+
|
| 573 |
+
def update_training_stats(self, stats):
|
| 574 |
+
self.progress_bar.setValue(stats['episodes'])
|
| 575 |
+
|
| 576 |
+
stats_text = f"""
|
| 577 |
+
Episodes: {stats['episodes']}
|
| 578 |
+
Wins: {stats['wins']} | Losses: {stats['losses']} | Draws: {stats['draws']}
|
| 579 |
+
Win Rate: {stats['wins']/stats['episodes']*100:.1f}%
|
| 580 |
+
Average Reward: {stats['avg_reward']:.3f}
|
| 581 |
+
Epsilon: {stats['epsilon']:.4f}
|
| 582 |
+
Current Loss: {stats['loss']:.4f}
|
| 583 |
+
""".strip()
|
| 584 |
+
|
| 585 |
+
self.stats_text.setText(stats_text)
|
| 586 |
+
|
| 587 |
+
def log_message(self, message):
|
| 588 |
+
timestamp = datetime.datetime.now().strftime("%H:%M:%S")
|
| 589 |
+
self.log_text.append(f"[{timestamp}] {message}")
|
| 590 |
+
# Auto-scroll to bottom
|
| 591 |
+
self.log_text.verticalScrollBar().setValue(
|
| 592 |
+
self.log_text.verticalScrollBar().maximum()
|
| 593 |
+
)
|
| 594 |
+
|
| 595 |
+
def reset_game(self):
|
| 596 |
+
self.env.reset()
|
| 597 |
+
self.update_display()
|
| 598 |
+
self.status_label.setText("Status: New Game Started")
|
| 599 |
+
self.auto_play_timer.stop()
|
| 600 |
+
|
| 601 |
+
def human_move(self, position):
|
| 602 |
+
if self.env.done or self.env.current_player != 1:
|
| 603 |
+
return
|
| 604 |
+
|
| 605 |
+
valid_moves = self.env.get_valid_moves()
|
| 606 |
+
if position in valid_moves:
|
| 607 |
+
state, reward, done, info = self.env.step(position)
|
| 608 |
+
self.update_display()
|
| 609 |
+
|
| 610 |
+
if done:
|
| 611 |
+
self.game_over(info)
|
| 612 |
+
else:
|
| 613 |
+
# Agent's turn
|
| 614 |
+
QTimer.singleShot(500, self.agent_move)
|
| 615 |
+
|
| 616 |
+
def agent_move(self):
|
| 617 |
+
if self.env.done or self.env.current_player != 2:
|
| 618 |
+
return
|
| 619 |
+
|
| 620 |
+
state = self.env.get_state()
|
| 621 |
+
valid_moves = self.env.get_valid_moves()
|
| 622 |
+
|
| 623 |
+
if valid_moves:
|
| 624 |
+
action = self.agent.act(state, valid_moves, training=False)
|
| 625 |
+
next_state, reward, done, info = self.env.step(action)
|
| 626 |
+
self.update_display()
|
| 627 |
+
|
| 628 |
+
if done:
|
| 629 |
+
self.game_over(info)
|
| 630 |
+
|
| 631 |
+
def auto_play(self):
|
| 632 |
+
if self.env.done:
|
| 633 |
+
self.reset_game()
|
| 634 |
+
|
| 635 |
+
self.auto_play_timer.start(1000) # 1 second between moves
|
| 636 |
+
self.status_label.setText("Status: Auto-playing...")
|
| 637 |
+
|
| 638 |
+
def auto_play_step(self):
|
| 639 |
+
if self.env.done:
|
| 640 |
+
self.auto_play_timer.stop()
|
| 641 |
+
self.status_label.setText("Status: Game Over - Auto-play")
|
| 642 |
+
return
|
| 643 |
+
|
| 644 |
+
if self.env.current_player == 1:
|
| 645 |
+
# Human player (random move for demo)
|
| 646 |
+
valid_moves = self.env.get_valid_moves()
|
| 647 |
+
if valid_moves:
|
| 648 |
+
action = random.choice(valid_moves)
|
| 649 |
+
self.env.step(action)
|
| 650 |
+
else:
|
| 651 |
+
# Agent player
|
| 652 |
+
state = self.env.get_state()
|
| 653 |
+
valid_moves = self.env.get_valid_moves()
|
| 654 |
+
if valid_moves:
|
| 655 |
+
action = self.agent.act(state, valid_moves, training=False)
|
| 656 |
+
self.env.step(action)
|
| 657 |
+
|
| 658 |
+
self.update_display()
|
| 659 |
+
|
| 660 |
+
if self.env.done:
|
| 661 |
+
self.auto_play_timer.stop()
|
| 662 |
+
self.game_over({'winner': self.env.winner})
|
| 663 |
+
|
| 664 |
+
def game_over(self, info):
|
| 665 |
+
winner = info.get('winner', 0)
|
| 666 |
+
if winner == 1:
|
| 667 |
+
self.status_label.setText("Status: You Win!")
|
| 668 |
+
elif winner == 2:
|
| 669 |
+
self.status_label.setText("Status: AI Wins!")
|
| 670 |
+
else:
|
| 671 |
+
self.status_label.setText("Status: Draw!")
|
| 672 |
+
|
| 673 |
+
def update_display(self):
|
| 674 |
+
symbols = {0: '', 1: 'X', 2: 'O'}
|
| 675 |
+
colors = {0: 'black', 1: 'blue', 2: 'red'}
|
| 676 |
+
|
| 677 |
+
for i in range(9):
|
| 678 |
+
symbol = symbols[self.env.board[i]]
|
| 679 |
+
color = colors[self.env.board[i]]
|
| 680 |
+
self.board_buttons[i].setText(symbol)
|
| 681 |
+
self.board_buttons[i].setStyleSheet(f"color: {color}; font-weight: bold;")
|
| 682 |
+
|
| 683 |
+
def load_model(self):
|
| 684 |
+
try:
|
| 685 |
+
if self.agent.load_model('models/dueling_dqn_latest.pth'):
|
| 686 |
+
self.log_message("Model loaded successfully!")
|
| 687 |
+
self.status_label.setText("Status: Model Loaded")
|
| 688 |
+
else:
|
| 689 |
+
self.log_message("No saved model found!")
|
| 690 |
+
except Exception as e:
|
| 691 |
+
self.log_message(f"Error loading model: {str(e)}")
|
| 692 |
+
|
| 693 |
+
def save_model(self):
|
| 694 |
+
try:
|
| 695 |
+
self.agent.save_model('models/dueling_dqn_latest.pth')
|
| 696 |
+
self.log_message("Model saved successfully!")
|
| 697 |
+
except Exception as e:
|
| 698 |
+
self.log_message(f"Error saving model: {str(e)}")
|
| 699 |
+
|
| 700 |
+
def main():
|
| 701 |
+
# Create necessary directories
|
| 702 |
+
os.makedirs('models', exist_ok=True)
|
| 703 |
+
|
| 704 |
+
app = QApplication(sys.argv)
|
| 705 |
+
|
| 706 |
+
# Set application style
|
| 707 |
+
app.setStyle('Fusion')
|
| 708 |
+
|
| 709 |
+
# Create and show main window
|
| 710 |
+
window = TicTacToeGUI()
|
| 711 |
+
window.show()
|
| 712 |
+
|
| 713 |
+
# Log startup message
|
| 714 |
+
window.log_message("Dueling DQN Tic-Tac-Toe Application Started")
|
| 715 |
+
window.log_message(f"Using PyTorch {torch.__version__}")
|
| 716 |
+
window.log_message(f"Available devices: CPU: True, CUDA: {torch.cuda.is_available()}, MPS: {torch.backends.mps.is_available()}")
|
| 717 |
+
|
| 718 |
+
sys.exit(app.exec_())
|
| 719 |
+
|
| 720 |
+
if __name__ == '__main__':
|
| 721 |
+
main()
|
Tic Tac Toe RL/eval_models.py
ADDED
|
@@ -0,0 +1,464 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# eval_models.py
|
| 2 |
+
import os
|
| 3 |
+
import glob
|
| 4 |
+
import numpy as np
|
| 5 |
+
import torch
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
import pandas as pd
|
| 8 |
+
from collections import deque
|
| 9 |
+
import logging
|
| 10 |
+
|
| 11 |
+
# Configure logging
|
| 12 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
class DuelingDQN(torch.nn.Module):
|
| 16 |
+
def __init__(self, input_size, output_size, hidden_size=256):
|
| 17 |
+
super(DuelingDQN, self).__init__()
|
| 18 |
+
self.input_size = input_size
|
| 19 |
+
self.output_size = output_size
|
| 20 |
+
|
| 21 |
+
# Feature layer
|
| 22 |
+
self.feature = torch.nn.Sequential(
|
| 23 |
+
torch.nn.Linear(input_size, hidden_size),
|
| 24 |
+
torch.nn.ReLU(),
|
| 25 |
+
torch.nn.Linear(hidden_size, hidden_size),
|
| 26 |
+
torch.nn.ReLU(),
|
| 27 |
+
torch.nn.Linear(hidden_size, hidden_size // 2),
|
| 28 |
+
torch.nn.ReLU()
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Value stream
|
| 32 |
+
self.value_stream = torch.nn.Sequential(
|
| 33 |
+
torch.nn.Linear(hidden_size // 2, hidden_size // 4),
|
| 34 |
+
torch.nn.ReLU(),
|
| 35 |
+
torch.nn.Linear(hidden_size // 4, 1)
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
# Advantage stream
|
| 39 |
+
self.advantage_stream = torch.nn.Sequential(
|
| 40 |
+
torch.nn.Linear(hidden_size // 2, hidden_size // 4),
|
| 41 |
+
torch.nn.ReLU(),
|
| 42 |
+
torch.nn.Linear(hidden_size // 4, output_size)
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def forward(self, state):
|
| 46 |
+
features = self.feature(state)
|
| 47 |
+
value = self.value_stream(features)
|
| 48 |
+
advantage = self.advantage_stream(features)
|
| 49 |
+
q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
|
| 50 |
+
return q_values
|
| 51 |
+
|
| 52 |
+
class TicTacToeEnvironment:
|
| 53 |
+
def __init__(self):
|
| 54 |
+
self.reset()
|
| 55 |
+
|
| 56 |
+
def reset(self):
|
| 57 |
+
self.board = np.zeros(9, dtype=int)
|
| 58 |
+
self.current_player = 1
|
| 59 |
+
self.done = False
|
| 60 |
+
self.winner = 0
|
| 61 |
+
return self.get_state()
|
| 62 |
+
|
| 63 |
+
def get_state(self):
|
| 64 |
+
state = np.zeros(9 * 3, dtype=np.float32)
|
| 65 |
+
for i in range(9):
|
| 66 |
+
if self.board[i] == 0:
|
| 67 |
+
state[i * 3] = 1.0
|
| 68 |
+
elif self.board[i] == 1:
|
| 69 |
+
state[i * 3 + 1] = 1.0
|
| 70 |
+
else:
|
| 71 |
+
state[i * 3 + 2] = 1.0
|
| 72 |
+
return state
|
| 73 |
+
|
| 74 |
+
def get_valid_moves(self):
|
| 75 |
+
return [i for i in range(9) if self.board[i] == 0]
|
| 76 |
+
|
| 77 |
+
def step(self, action):
|
| 78 |
+
if self.done:
|
| 79 |
+
return self.get_state(), 0, True, {}
|
| 80 |
+
|
| 81 |
+
if self.board[action] != 0:
|
| 82 |
+
return self.get_state(), -5, True, {}
|
| 83 |
+
|
| 84 |
+
self.board[action] = self.current_player
|
| 85 |
+
|
| 86 |
+
if self.check_win(self.current_player):
|
| 87 |
+
self.done = True
|
| 88 |
+
self.winner = self.current_player
|
| 89 |
+
reward = 10
|
| 90 |
+
elif len(self.get_valid_moves()) == 0:
|
| 91 |
+
self.done = True
|
| 92 |
+
reward = 2
|
| 93 |
+
else:
|
| 94 |
+
reward = 0.1
|
| 95 |
+
self.current_player = 3 - self.current_player
|
| 96 |
+
|
| 97 |
+
return self.get_state(), reward, self.done, {'winner': self.winner}
|
| 98 |
+
|
| 99 |
+
def check_win(self, player):
|
| 100 |
+
winning_combinations = [
|
| 101 |
+
[0, 1, 2], [3, 4, 5], [6, 7, 8],
|
| 102 |
+
[0, 3, 6], [1, 4, 7], [2, 5, 8],
|
| 103 |
+
[0, 4, 8], [2, 4, 6]
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
for combo in winning_combinations:
|
| 107 |
+
if all(self.board[i] == player for i in combo):
|
| 108 |
+
return True
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
def render(self):
|
| 112 |
+
symbols = {0: ' ', 1: 'X', 2: 'O'}
|
| 113 |
+
board_str = "\n"
|
| 114 |
+
for i in range(3):
|
| 115 |
+
row = [symbols[self.board[i*3 + j]] for j in range(3)]
|
| 116 |
+
board_str += " " + " | ".join(row) + " \n"
|
| 117 |
+
if i < 2:
|
| 118 |
+
board_str += "-----------\n"
|
| 119 |
+
return board_str
|
| 120 |
+
|
| 121 |
+
class ModelEvaluator:
|
| 122 |
+
def __init__(self, models_dir='models'):
|
| 123 |
+
self.models_dir = models_dir
|
| 124 |
+
self.device = torch.device("mps" if torch.backends.mps.is_available() else
|
| 125 |
+
"cuda" if torch.cuda.is_available() else "cpu")
|
| 126 |
+
logger.info(f"Using device: {self.device}")
|
| 127 |
+
|
| 128 |
+
def load_model(self, model_path):
|
| 129 |
+
"""Load model with compatibility for different PyTorch versions"""
|
| 130 |
+
try:
|
| 131 |
+
# Try with weights_only=True first (PyTorch 2.6+)
|
| 132 |
+
checkpoint = torch.load(model_path, map_location=self.device, weights_only=True)
|
| 133 |
+
except:
|
| 134 |
+
try:
|
| 135 |
+
# Fallback to weights_only=False
|
| 136 |
+
checkpoint = torch.load(model_path, map_location=self.device, weights_only=False)
|
| 137 |
+
except Exception as e:
|
| 138 |
+
logger.error(f"Failed to load model {model_path}: {e}")
|
| 139 |
+
return None
|
| 140 |
+
|
| 141 |
+
state_size = 27
|
| 142 |
+
action_size = 9
|
| 143 |
+
model = DuelingDQN(state_size, action_size).to(self.device)
|
| 144 |
+
model.load_state_dict(checkpoint['policy_net_state_dict'])
|
| 145 |
+
model.eval()
|
| 146 |
+
|
| 147 |
+
return model
|
| 148 |
+
|
| 149 |
+
def evaluate_model(self, model, num_games=100, agent_player=1):
|
| 150 |
+
"""Evaluate model performance against random opponent"""
|
| 151 |
+
env = TicTacToeEnvironment()
|
| 152 |
+
wins = 0
|
| 153 |
+
losses = 0
|
| 154 |
+
draws = 0
|
| 155 |
+
total_reward = 0
|
| 156 |
+
|
| 157 |
+
for game in range(num_games):
|
| 158 |
+
state = env.reset()
|
| 159 |
+
game_reward = 0
|
| 160 |
+
steps = 0
|
| 161 |
+
|
| 162 |
+
while not env.done:
|
| 163 |
+
valid_moves = env.get_valid_moves()
|
| 164 |
+
|
| 165 |
+
if env.current_player == agent_player:
|
| 166 |
+
# Agent's turn
|
| 167 |
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
| 168 |
+
with torch.no_grad():
|
| 169 |
+
q_values = model(state_tensor)
|
| 170 |
+
|
| 171 |
+
# Mask invalid moves
|
| 172 |
+
q_values_np = q_values.cpu().numpy()[0]
|
| 173 |
+
for i in range(9):
|
| 174 |
+
if i not in valid_moves:
|
| 175 |
+
q_values_np[i] = -float('inf')
|
| 176 |
+
|
| 177 |
+
action = np.argmax(q_values_np)
|
| 178 |
+
else:
|
| 179 |
+
# Random opponent
|
| 180 |
+
action = np.random.choice(valid_moves)
|
| 181 |
+
|
| 182 |
+
state, reward, done, info = env.step(action)
|
| 183 |
+
|
| 184 |
+
if env.current_player == agent_player:
|
| 185 |
+
game_reward += reward
|
| 186 |
+
|
| 187 |
+
steps += 1
|
| 188 |
+
|
| 189 |
+
# Determine game outcome from agent's perspective
|
| 190 |
+
if info['winner'] == agent_player:
|
| 191 |
+
wins += 1
|
| 192 |
+
elif info['winner'] == 0:
|
| 193 |
+
draws += 1
|
| 194 |
+
else:
|
| 195 |
+
losses += 1
|
| 196 |
+
|
| 197 |
+
total_reward += game_reward
|
| 198 |
+
|
| 199 |
+
win_rate = wins / num_games
|
| 200 |
+
avg_reward = total_reward / num_games
|
| 201 |
+
|
| 202 |
+
return {
|
| 203 |
+
'wins': wins,
|
| 204 |
+
'losses': losses,
|
| 205 |
+
'draws': draws,
|
| 206 |
+
'win_rate': win_rate,
|
| 207 |
+
'avg_reward': avg_reward
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
def play_interactive_game(self, model):
|
| 211 |
+
"""Play an interactive game against the model"""
|
| 212 |
+
env = TicTacToeEnvironment()
|
| 213 |
+
|
| 214 |
+
print("\n🎮 Interactive Game Mode")
|
| 215 |
+
print("You are 'X', AI is 'O'")
|
| 216 |
+
print("Enter moves as numbers 0-8 (left to right, top to bottom):")
|
| 217 |
+
print("0 | 1 | 2")
|
| 218 |
+
print("---------")
|
| 219 |
+
print("3 | 4 | 5")
|
| 220 |
+
print("---------")
|
| 221 |
+
print("6 | 7 | 8")
|
| 222 |
+
|
| 223 |
+
while not env.done:
|
| 224 |
+
print(env.render())
|
| 225 |
+
|
| 226 |
+
if env.current_player == 1: # Human turn
|
| 227 |
+
valid_moves = env.get_valid_moves()
|
| 228 |
+
print(f"Your turn. Valid moves: {valid_moves}")
|
| 229 |
+
|
| 230 |
+
try:
|
| 231 |
+
move = int(input("Enter your move (0-8): "))
|
| 232 |
+
if move not in valid_moves:
|
| 233 |
+
print("Invalid move! Try again.")
|
| 234 |
+
continue
|
| 235 |
+
except ValueError:
|
| 236 |
+
print("Please enter a number between 0-8")
|
| 237 |
+
continue
|
| 238 |
+
|
| 239 |
+
env.step(move)
|
| 240 |
+
|
| 241 |
+
else: # AI turn
|
| 242 |
+
print("AI is thinking...")
|
| 243 |
+
state = env.get_state()
|
| 244 |
+
valid_moves = env.get_valid_moves()
|
| 245 |
+
|
| 246 |
+
state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
|
| 247 |
+
with torch.no_grad():
|
| 248 |
+
q_values = model(state_tensor)
|
| 249 |
+
|
| 250 |
+
# Mask invalid moves
|
| 251 |
+
q_values_np = q_values.cpu().numpy()[0]
|
| 252 |
+
for i in range(9):
|
| 253 |
+
if i not in valid_moves:
|
| 254 |
+
q_values_np[i] = -float('inf')
|
| 255 |
+
|
| 256 |
+
action = np.argmax(q_values_np)
|
| 257 |
+
env.step(action)
|
| 258 |
+
print(f"AI played move: {action}")
|
| 259 |
+
|
| 260 |
+
print(env.render())
|
| 261 |
+
if env.winner == 1:
|
| 262 |
+
print("🎉 You win!")
|
| 263 |
+
elif env.winner == 2:
|
| 264 |
+
print("🤖 AI wins!")
|
| 265 |
+
else:
|
| 266 |
+
print("🤝 It's a draw!")
|
| 267 |
+
|
| 268 |
+
def run_comprehensive_evaluation(self, num_games_per_model=50):
|
| 269 |
+
"""Evaluate all models in the models directory"""
|
| 270 |
+
model_files = glob.glob(os.path.join(self.models_dir, "*.pth"))
|
| 271 |
+
model_files.sort() # Sort by name to maintain episode order
|
| 272 |
+
|
| 273 |
+
if not model_files:
|
| 274 |
+
logger.error("No model files found in models directory!")
|
| 275 |
+
return
|
| 276 |
+
|
| 277 |
+
results = []
|
| 278 |
+
|
| 279 |
+
print(f"\n🔍 Evaluating {len(model_files)} models with {num_games_per_model} games each...")
|
| 280 |
+
|
| 281 |
+
for model_file in model_files:
|
| 282 |
+
model_name = os.path.basename(model_file)
|
| 283 |
+
print(f"\nEvaluating: {model_name}")
|
| 284 |
+
|
| 285 |
+
model = self.load_model(model_file)
|
| 286 |
+
if model is None:
|
| 287 |
+
continue
|
| 288 |
+
|
| 289 |
+
# Evaluate as both player 1 and player 2
|
| 290 |
+
stats_p1 = self.evaluate_model(model, num_games_per_model, agent_player=1)
|
| 291 |
+
stats_p2 = self.evaluate_model(model, num_games_per_model, agent_player=2)
|
| 292 |
+
|
| 293 |
+
# Extract episode number from filename
|
| 294 |
+
episode_num = self._extract_episode_number(model_name)
|
| 295 |
+
|
| 296 |
+
result = {
|
| 297 |
+
'model_file': model_name,
|
| 298 |
+
'episode': episode_num,
|
| 299 |
+
'win_rate_p1': stats_p1['win_rate'],
|
| 300 |
+
'win_rate_p2': stats_p2['win_rate'],
|
| 301 |
+
'avg_win_rate': (stats_p1['win_rate'] + stats_p2['win_rate']) / 2,
|
| 302 |
+
'wins_p1': stats_p1['wins'],
|
| 303 |
+
'losses_p1': stats_p1['losses'],
|
| 304 |
+
'draws_p1': stats_p1['draws'],
|
| 305 |
+
'wins_p2': stats_p2['wins'],
|
| 306 |
+
'losses_p2': stats_p2['losses'],
|
| 307 |
+
'draws_p2': stats_p2['draws'],
|
| 308 |
+
'avg_reward_p1': stats_p1['avg_reward'],
|
| 309 |
+
'avg_reward_p2': stats_p2['avg_reward']
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
results.append(result)
|
| 313 |
+
|
| 314 |
+
print(f" As Player 1: Win Rate: {stats_p1['win_rate']:.1%}")
|
| 315 |
+
print(f" As Player 2: Win Rate: {stats_p2['win_rate']:.1%}")
|
| 316 |
+
print(f" Average Win Rate: {result['avg_win_rate']:.1%}")
|
| 317 |
+
|
| 318 |
+
# Save results to CSV
|
| 319 |
+
self._save_results_to_csv(results)
|
| 320 |
+
|
| 321 |
+
# Create visualization
|
| 322 |
+
self._create_visualizations(results)
|
| 323 |
+
|
| 324 |
+
# Find and test the best model
|
| 325 |
+
best_model_info = max(results, key=lambda x: x['avg_win_rate'])
|
| 326 |
+
best_model_path = os.path.join(self.models_dir, best_model_info['model_file'])
|
| 327 |
+
best_model = self.load_model(best_model_path)
|
| 328 |
+
|
| 329 |
+
print(f"\n🏆 Best Model: {best_model_info['model_file']}")
|
| 330 |
+
print(f" Average Win Rate: {best_model_info['avg_win_rate']:.1%}")
|
| 331 |
+
|
| 332 |
+
# Interactive game with best model
|
| 333 |
+
while True:
|
| 334 |
+
play = input("\nWould you like to play against the best model? (y/n): ").lower().strip()
|
| 335 |
+
if play == 'y':
|
| 336 |
+
self.play_interactive_game(best_model)
|
| 337 |
+
|
| 338 |
+
play_again = input("\nPlay again? (y/n): ").lower().strip()
|
| 339 |
+
if play_again != 'y':
|
| 340 |
+
break
|
| 341 |
+
else:
|
| 342 |
+
break
|
| 343 |
+
|
| 344 |
+
def _extract_episode_number(self, filename):
|
| 345 |
+
"""Extract episode number from filename"""
|
| 346 |
+
import re
|
| 347 |
+
match = re.search(r'episode_(\d+)', filename)
|
| 348 |
+
return int(match.group(1)) if match else 0
|
| 349 |
+
|
| 350 |
+
def _save_results_to_csv(self, results):
|
| 351 |
+
"""Save evaluation results to CSV"""
|
| 352 |
+
df = pd.DataFrame(results)
|
| 353 |
+
csv_path = 'model_evaluation_results.csv'
|
| 354 |
+
df.to_csv(csv_path, index=False)
|
| 355 |
+
print(f"\n📊 Results saved to: {csv_path}")
|
| 356 |
+
|
| 357 |
+
# Print summary statistics
|
| 358 |
+
print(f"\n📈 Summary Statistics:")
|
| 359 |
+
print(f" Models evaluated: {len(results)}")
|
| 360 |
+
print(f" Best win rate: {df['avg_win_rate'].max():.1%}")
|
| 361 |
+
print(f" Worst win rate: {df['avg_win_rate'].min():.1%}")
|
| 362 |
+
print(f" Average win rate: {df['avg_win_rate'].mean():.1%}")
|
| 363 |
+
|
| 364 |
+
def _create_visualizations(self, results):
|
| 365 |
+
"""Create visualization plots for model performance"""
|
| 366 |
+
episodes = [r['episode'] for r in results]
|
| 367 |
+
win_rates_p1 = [r['win_rate_p1'] for r in results]
|
| 368 |
+
win_rates_p2 = [r['win_rate_p2'] for r in results]
|
| 369 |
+
avg_win_rates = [r['avg_win_rate'] for r in results]
|
| 370 |
+
|
| 371 |
+
plt.figure(figsize=(12, 8))
|
| 372 |
+
|
| 373 |
+
plt.subplot(2, 2, 1)
|
| 374 |
+
plt.plot(episodes, win_rates_p1, 'b-', label='As Player 1', alpha=0.7)
|
| 375 |
+
plt.plot(episodes, win_rates_p2, 'r-', label='As Player 2', alpha=0.7)
|
| 376 |
+
plt.plot(episodes, avg_win_rates, 'g-', label='Average', linewidth=2)
|
| 377 |
+
plt.xlabel('Training Episode')
|
| 378 |
+
plt.ylabel('Win Rate')
|
| 379 |
+
plt.title('Model Performance vs Random Opponent')
|
| 380 |
+
plt.legend()
|
| 381 |
+
plt.grid(True, alpha=0.3)
|
| 382 |
+
|
| 383 |
+
plt.subplot(2, 2, 2)
|
| 384 |
+
plt.scatter(episodes, avg_win_rates, c=avg_win_rates, cmap='viridis', alpha=0.6)
|
| 385 |
+
plt.colorbar(label='Win Rate')
|
| 386 |
+
plt.xlabel('Training Episode')
|
| 387 |
+
plt.ylabel('Average Win Rate')
|
| 388 |
+
plt.title('Learning Progress')
|
| 389 |
+
plt.grid(True, alpha=0.3)
|
| 390 |
+
|
| 391 |
+
plt.subplot(2, 2, 3)
|
| 392 |
+
outcomes_p1 = np.array([(r['wins_p1'], r['losses_p1'], r['draws_p1']) for r in results])
|
| 393 |
+
outcomes_p1 = outcomes_p1 / outcomes_p1.sum(axis=1, keepdims=True)
|
| 394 |
+
plt.stackplot(episodes, outcomes_p1.T, labels=['Wins', 'Losses', 'Draws'], alpha=0.7)
|
| 395 |
+
plt.xlabel('Training Episode')
|
| 396 |
+
plt.ylabel('Proportion')
|
| 397 |
+
plt.title('Outcomes as Player 1')
|
| 398 |
+
plt.legend()
|
| 399 |
+
plt.grid(True, alpha=0.3)
|
| 400 |
+
|
| 401 |
+
plt.subplot(2, 2, 4)
|
| 402 |
+
outcomes_p2 = np.array([(r['wins_p2'], r['losses_p2'], r['draws_p2']) for r in results])
|
| 403 |
+
outcomes_p2 = outcomes_p2 / outcomes_p2.sum(axis=1, keepdims=True)
|
| 404 |
+
plt.stackplot(episodes, outcomes_p2.T, labels=['Wins', 'Losses', 'Draws'], alpha=0.7)
|
| 405 |
+
plt.xlabel('Training Episode')
|
| 406 |
+
plt.ylabel('Proportion')
|
| 407 |
+
plt.title('Outcomes as Player 2')
|
| 408 |
+
plt.legend()
|
| 409 |
+
plt.grid(True, alpha=0.3)
|
| 410 |
+
|
| 411 |
+
plt.tight_layout()
|
| 412 |
+
plt.savefig('model_performance_analysis.png', dpi=300, bbox_inches='tight')
|
| 413 |
+
print("📈 Visualization saved as: model_performance_analysis.png")
|
| 414 |
+
|
| 415 |
+
def main():
|
| 416 |
+
evaluator = ModelEvaluator()
|
| 417 |
+
|
| 418 |
+
print("=" * 60)
|
| 419 |
+
print("🤖 Dueling DQN Tic-Tac-Toe Model Evaluator")
|
| 420 |
+
print("=" * 60)
|
| 421 |
+
|
| 422 |
+
while True:
|
| 423 |
+
print("\nOptions:")
|
| 424 |
+
print("1. Comprehensive evaluation of all models")
|
| 425 |
+
print("2. Interactive game with specific model")
|
| 426 |
+
print("3. Exit")
|
| 427 |
+
|
| 428 |
+
choice = input("\nEnter your choice (1-3): ").strip()
|
| 429 |
+
|
| 430 |
+
if choice == '1':
|
| 431 |
+
num_games = input("Enter number of games per model (default 50): ").strip()
|
| 432 |
+
num_games = int(num_games) if num_games.isdigit() else 50
|
| 433 |
+
evaluator.run_comprehensive_evaluation(num_games)
|
| 434 |
+
|
| 435 |
+
elif choice == '2':
|
| 436 |
+
model_files = glob.glob("models/*.pth")
|
| 437 |
+
if not model_files:
|
| 438 |
+
print("No model files found in models directory!")
|
| 439 |
+
continue
|
| 440 |
+
|
| 441 |
+
print("\nAvailable models:")
|
| 442 |
+
for i, model_file in enumerate(model_files, 1):
|
| 443 |
+
print(f"{i}. {os.path.basename(model_file)}")
|
| 444 |
+
|
| 445 |
+
try:
|
| 446 |
+
model_choice = int(input(f"\nSelect model (1-{len(model_files)}): ")) - 1
|
| 447 |
+
if 0 <= model_choice < len(model_files):
|
| 448 |
+
model = evaluator.load_model(model_files[model_choice])
|
| 449 |
+
if model:
|
| 450 |
+
evaluator.play_interactive_game(model)
|
| 451 |
+
else:
|
| 452 |
+
print("Invalid selection!")
|
| 453 |
+
except ValueError:
|
| 454 |
+
print("Please enter a valid number!")
|
| 455 |
+
|
| 456 |
+
elif choice == '3':
|
| 457 |
+
print("Goodbye!")
|
| 458 |
+
break
|
| 459 |
+
|
| 460 |
+
else:
|
| 461 |
+
print("Invalid choice! Please enter 1, 2, or 3.")
|
| 462 |
+
|
| 463 |
+
if __name__ == '__main__':
|
| 464 |
+
main()
|
Tic Tac Toe RL/model_evaluation_results.csv
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_file,episode,win_rate_p1,win_rate_p2,avg_win_rate,wins_p1,losses_p1,draws_p1,wins_p2,losses_p2,draws_p2,avg_reward_p1,avg_reward_p2
|
| 2 |
+
dueling_dqn_episode_100.pth,100,0.76,0.44,0.6,38,10,2,22,23,5,7.982000000000002,4.752000000000004
|
| 3 |
+
dueling_dqn_episode_1000.pth,1000,0.58,0.14,0.36,29,17,4,7,30,13,6.278000000000002,1.76
|
| 4 |
+
dueling_dqn_episode_1100.pth,1100,0.52,0.12,0.32,26,12,12,6,35,9,6.004000000000003,1.5460000000000003
|
| 5 |
+
dueling_dqn_episode_1200.pth,1200,0.48,0.22,0.35,24,17,9,11,29,10,5.488000000000003,2.5500000000000007
|
| 6 |
+
dueling_dqn_episode_1300.pth,1300,0.64,0.16,0.4,32,12,6,8,34,8,6.958000000000001,1.9580000000000015
|
| 7 |
+
dueling_dqn_episode_1400.pth,1400,0.52,0.24,0.38,26,11,13,12,33,5,6.072,2.7560000000000016
|
| 8 |
+
dueling_dqn_episode_1500.pth,1500,0.56,0.28,0.42000000000000004,28,14,8,14,30,6,6.238000000000002,3.150000000000003
|
| 9 |
+
dueling_dqn_episode_1600.pth,1600,0.4,0.14,0.27,20,27,3,7,39,4,4.438000000000004,1.7460000000000002
|
| 10 |
+
dueling_dqn_episode_1700.pth,1700,0.36,0.16,0.26,18,26,6,8,39,3,4.160000000000003,1.942000000000001
|
| 11 |
+
dueling_dqn_episode_1800.pth,1800,0.34,0.04,0.19,17,28,5,2,46,2,3.912000000000004,0.7399999999999993
|
| 12 |
+
dueling_dqn_episode_1900.pth,1900,0.38,0.06,0.22,19,26,5,3,43,4,4.320000000000003,0.938
|
| 13 |
+
dueling_dqn_episode_200.pth,200,0.7,0.32,0.51,35,8,7,16,24,10,7.602,3.542000000000002
|
| 14 |
+
dueling_dqn_episode_2000.pth,2000,0.44,0.12,0.28,22,24,4,6,40,4,4.888000000000004,1.5539999999999998
|
| 15 |
+
dueling_dqn_episode_2100.pth,2100,0.32,0.26,0.29000000000000004,16,32,2,13,33,4,3.5820000000000034,2.9460000000000006
|
| 16 |
+
dueling_dqn_episode_2200.pth,2200,0.36,0.14,0.25,18,30,2,7,38,5,3.992000000000003,1.734
|
| 17 |
+
dueling_dqn_episode_2300.pth,2300,0.52,0.14,0.33,26,20,4,7,36,7,5.682,1.743999999999999
|
| 18 |
+
dueling_dqn_episode_2400.pth,2400,0.56,0.22,0.39,28,18,4,11,36,3,6.100000000000001,2.5480000000000014
|
| 19 |
+
dueling_dqn_episode_2500.pth,2500,0.26,0.14,0.2,13,25,12,7,37,6,3.4080000000000026,1.7520000000000002
|
| 20 |
+
dueling_dqn_episode_2600.pth,2600,0.46,0.18,0.32,23,21,6,9,36,5,5.174000000000003,2.134000000000001
|
| 21 |
+
dueling_dqn_episode_2700.pth,2700,0.48,0.2,0.33999999999999997,24,15,11,10,37,3,5.586000000000001,2.3320000000000003
|
| 22 |
+
dueling_dqn_episode_2800.pth,2800,0.3,0.14,0.22,15,22,13,7,42,1,3.8620000000000014,1.7440000000000004
|
| 23 |
+
dueling_dqn_episode_2900.pth,2900,0.4,0.12,0.26,20,23,7,6,42,2,4.600000000000003,1.534
|
| 24 |
+
dueling_dqn_episode_300.pth,300,0.6,0.4,0.5,30,17,3,20,27,3,6.379999999999999,4.330000000000002
|
| 25 |
+
dueling_dqn_episode_3000.pth,3000,0.4,0.18,0.29000000000000004,20,23,7,9,40,1,4.610000000000003,2.1280000000000006
|
| 26 |
+
dueling_dqn_episode_3100.pth,3100,0.48,0.16,0.32,24,20,6,8,40,2,5.3720000000000026,1.9460000000000008
|
| 27 |
+
dueling_dqn_episode_3200.pth,3200,0.24,0.14,0.19,12,26,12,7,42,1,3.2040000000000015,1.7420000000000002
|
| 28 |
+
dueling_dqn_episode_3300.pth,3300,0.34,0.22,0.28,17,27,6,11,37,2,3.960000000000002,2.5380000000000007
|
| 29 |
+
dueling_dqn_episode_3400.pth,3400,0.52,0.1,0.31,26,15,9,5,43,2,5.8980000000000015,1.3279999999999992
|
| 30 |
+
dueling_dqn_episode_3500.pth,3500,0.44,0.2,0.32,22,24,4,10,37,3,4.890000000000004,2.3420000000000005
|
| 31 |
+
dueling_dqn_episode_3600.pth,3600,0.42,0.16,0.29,21,19,10,8,42,0,4.946000000000002,1.9320000000000008
|
| 32 |
+
dueling_dqn_episode_3700.pth,3700,0.28,0.32,0.30000000000000004,14,26,10,16,34,0,3.534000000000002,3.5420000000000016
|
| 33 |
+
dueling_dqn_episode_3800.pth,3800,0.42,0.08,0.25,21,23,6,4,46,0,4.760000000000004,1.1299999999999992
|
| 34 |
+
dueling_dqn_episode_3900.pth,3900,0.36,0.2,0.28,18,24,8,10,37,3,4.250000000000002,2.3280000000000003
|
| 35 |
+
dueling_dqn_episode_400.pth,400,0.54,0.32,0.43000000000000005,27,19,4,16,27,7,5.882000000000002,3.5400000000000036
|
| 36 |
+
dueling_dqn_episode_4000.pth,4000,0.46,0.08,0.27,23,17,10,4,40,6,5.3480000000000025,1.1399999999999995
|
| 37 |
+
dueling_dqn_episode_4100.pth,4100,0.48,0.22,0.35,24,19,7,11,36,3,5.424000000000002,2.5400000000000005
|
| 38 |
+
dueling_dqn_episode_4200.pth,4200,0.44,0.22,0.33,22,18,10,11,31,8,5.130000000000003,2.564000000000001
|
| 39 |
+
dueling_dqn_episode_4300.pth,4300,0.42,0.16,0.29,21,21,8,8,36,6,4.852000000000003,1.9479999999999993
|
| 40 |
+
dueling_dqn_episode_4400.pth,4400,0.64,0.24,0.44,32,9,9,12,30,8,7.092,2.7540000000000013
|
| 41 |
+
dueling_dqn_episode_4500.pth,4500,0.62,0.24,0.43,31,11,8,12,33,5,6.85,2.7500000000000004
|
| 42 |
+
dueling_dqn_episode_4600.pth,4600,0.54,0.26,0.4,27,16,7,13,32,5,6.016000000000003,2.9560000000000013
|
| 43 |
+
dueling_dqn_episode_4700.pth,4700,0.62,0.22,0.42,31,13,6,11,32,7,6.7760000000000025,2.5400000000000005
|
| 44 |
+
dueling_dqn_episode_4800.pth,4800,0.56,0.3,0.43000000000000005,28,16,6,15,30,5,6.172000000000004,3.352000000000002
|
| 45 |
+
dueling_dqn_episode_4900.pth,4900,0.44,0.44,0.44,22,24,4,22,23,5,4.8720000000000026,4.7520000000000024
|
| 46 |
+
dueling_dqn_episode_500.pth,500,0.6,0.16,0.38,30,14,6,8,32,10,6.560000000000003,1.9440000000000008
|
| 47 |
+
dueling_dqn_episode_5000.pth,5000,0.54,0.3,0.42000000000000004,27,18,5,15,26,9,5.928000000000003,3.354000000000002
|
| 48 |
+
dueling_dqn_episode_600.pth,600,0.5,0.16,0.33,25,22,3,8,39,3,5.422000000000002,1.9399999999999995
|
| 49 |
+
dueling_dqn_episode_700.pth,700,0.52,0.12,0.32,26,16,8,6,34,10,5.846000000000003,1.56
|
| 50 |
+
dueling_dqn_episode_800.pth,800,0.42,0.06,0.24,21,23,6,3,37,10,4.752000000000003,0.9339999999999997
|
| 51 |
+
dueling_dqn_episode_900.pth,900,0.4,0.12,0.26,20,22,8,6,39,5,4.646000000000005,1.5459999999999996
|
| 52 |
+
dueling_dqn_latest.pth,0,0.44,0.22,0.33,22,14,14,11,33,6,5.320000000000004,2.5620000000000016
|
Tic Tac Toe RL/model_performance_analysis.png
ADDED
|
Git LFS Details
|
Tic Tac Toe RL/models/dueling_dqn_episode_100.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7889831812ac7b14064856f34ee5ab01ee87dee643e92ac7f69c17fa90edd81
|
| 3 |
+
size 2197905
|
Tic Tac Toe RL/models/dueling_dqn_episode_1000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fa605d7d49dad36efe8306934fa7113b4b228e7cce90ed8f4b6435f6e941dc2a
|
| 3 |
+
size 4454493
|
Tic Tac Toe RL/models/dueling_dqn_episode_1100.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d803d2395adece1d6400332108be84ba422cb12a84ebcb2215a676945f5a215e
|
| 3 |
+
size 4644317
|
Tic Tac Toe RL/models/dueling_dqn_episode_1200.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a88625a6137c3739c1e82e59ecf81d8f28b86658b0f6b32deb7ceffd29cc3db2
|
| 3 |
+
size 4851101
|
Tic Tac Toe RL/models/dueling_dqn_episode_1300.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9384217041ff8518fe68cba9eb04021149c964cb1add5f9de10b66efd1c701c2
|
| 3 |
+
size 5065565
|
Tic Tac Toe RL/models/dueling_dqn_episode_1400.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f74e2a0d223abc6bf0960cc34192a764a11d3e902dfcb286affb7d1ea3214ab
|
| 3 |
+
size 5130525
|
Tic Tac Toe RL/models/dueling_dqn_episode_1500.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f29380fab72c71b4ee476e6a314125cb9bb451c397d20b04376fa98cd09afb5d
|
| 3 |
+
size 5146717
|
Tic Tac Toe RL/models/dueling_dqn_episode_1600.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f7eb75fd7b88b7cbb4adec1e0071928dfd67ea69cba879fa24ce40e6e3bac39
|
| 3 |
+
size 5152861
|
Tic Tac Toe RL/models/dueling_dqn_episode_1700.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b418935e207e9eaa70bb1779ecc165ba65ffcec8142b77abdb724ff5abc96653
|
| 3 |
+
size 5156765
|
Tic Tac Toe RL/models/dueling_dqn_episode_1800.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50edd4d7243009e1d5c1e813ad87efc34bc7c01cc5165b81390639e58fd2d8ac
|
| 3 |
+
size 5153437
|
Tic Tac Toe RL/models/dueling_dqn_episode_1900.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be548035d7be186486838d2265b1c3d95f8fa2b11eda0a8f8c8642223fe5ee48
|
| 3 |
+
size 5152733
|
Tic Tac Toe RL/models/dueling_dqn_episode_200.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19050edfa22a03f7e0443f695500c1a68307aadf58f2199438ef95a289dc2eec
|
| 3 |
+
size 2417233
|
Tic Tac Toe RL/models/dueling_dqn_episode_2000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec3e4ed4d208f5a75da8e58c70cb83464d9037719a358bebea76b69786d928b9
|
| 3 |
+
size 5152221
|
Tic Tac Toe RL/models/dueling_dqn_episode_2100.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6be461c61433f8516fcf60b1f9b91a81f9492698b76c831e5dec7748e7fb0914
|
| 3 |
+
size 5152157
|
Tic Tac Toe RL/models/dueling_dqn_episode_2200.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f61d3fb28f2e81314bebb4375afc14a71b5861cabc5876160e41dc0f581b4aa
|
| 3 |
+
size 5154077
|
Tic Tac Toe RL/models/dueling_dqn_episode_2300.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddae05e38057f556a29cb233e37ace40274cb3a03b7e0b29319be844cf38870c
|
| 3 |
+
size 5149469
|
Tic Tac Toe RL/models/dueling_dqn_episode_2400.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4109bcc122f27be544e91d46efb9ac0d2f9769c2ce7ac6c38689672f42deacc
|
| 3 |
+
size 5150621
|
Tic Tac Toe RL/models/dueling_dqn_episode_2500.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d8807d7202152b6b65a158ca4385529eccb34943657b1349d1fe7b4ff332c3f
|
| 3 |
+
size 5153629
|
Tic Tac Toe RL/models/dueling_dqn_episode_2600.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11b052c2ff7adc88fd510639db8e4a96eeaab48fcf2ea5b24b7343532766f1c1
|
| 3 |
+
size 5150877
|
Tic Tac Toe RL/models/dueling_dqn_episode_2700.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:015cd27b690ab04191663a5ceb67e29ace9d36b3c65204360dc38c3acd14f531
|
| 3 |
+
size 5150173
|
Tic Tac Toe RL/models/dueling_dqn_episode_2800.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91b10e1a94413e0c6fe284d706b5a735c7480025285a80906c2f08c46b9b72bc
|
| 3 |
+
size 5149789
|
Tic Tac Toe RL/models/dueling_dqn_episode_2900.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cba87fbbd59e713bc4e6d217762ea09ee2c3b9c06b2d09f1ceb40efe513e459
|
| 3 |
+
size 5146397
|
Tic Tac Toe RL/models/dueling_dqn_episode_300.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:766075d03432a67b90d07f5bae5aae62741318d78717c1f5191a3563ff3801a8
|
| 3 |
+
size 2659281
|
Tic Tac Toe RL/models/dueling_dqn_episode_3000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40f78912717adff486ef96d9f410e696fdce3af785a3907ddad83f458db60be5
|
| 3 |
+
size 5145501
|
Tic Tac Toe RL/models/dueling_dqn_episode_3100.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:216edbd69bf3c21d246e4a82566b3a9eea7e9b2a989a81413079da9404dd6a50
|
| 3 |
+
size 5144285
|
Tic Tac Toe RL/models/dueling_dqn_episode_3200.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b7e0e306d3ed614aa648a650e174e3baad4dcfeb7f0e3855cb4bdc8942eebeab
|
| 3 |
+
size 5145629
|
Tic Tac Toe RL/models/dueling_dqn_episode_3300.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8b1f1f8404693a759e62b1f985bd5a7b7f2c81ade12935ab95f5d0ce07ed0675
|
| 3 |
+
size 5149661
|
Tic Tac Toe RL/models/dueling_dqn_episode_3400.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e925beaad67ac5a006ee6f74a7db596640c1217ac1a540e311e80ca50798427c
|
| 3 |
+
size 5151965
|
Tic Tac Toe RL/models/dueling_dqn_episode_3500.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:213d2e33f51609a0b0442065eafb77c4316280f78c059bce99c55aec2645c193
|
| 3 |
+
size 5151581
|
Tic Tac Toe RL/models/dueling_dqn_episode_3600.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bae1cb761c8b82b183f2b057dc06006b58e5e08763d106b4d2fc91b7eee66916
|
| 3 |
+
size 5144733
|
Tic Tac Toe RL/models/dueling_dqn_episode_3700.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85721bf70830cce1615d227ad69238e6b045104b68c8c4c2105e6509d112223d
|
| 3 |
+
size 5136349
|