TroglodyteDerivations commited on
Commit
661308e
·
verified ·
1 Parent(s): 86134e9

Upload 73 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. Imitation Learning Tic Tac Toe AI 2/analyze_ttt_model.py +377 -0
  3. Imitation Learning Tic Tac Toe AI 2/create_pretrained_ai.py +129 -0
  4. Imitation Learning Tic Tac Toe AI 2/improved_game.py +529 -0
  5. Imitation Learning Tic Tac Toe AI 2/requirements.txt +2 -0
  6. Imitation Learning Tic Tac Toe AI 2/ttt_ai_model_improved.pkl +3 -0
  7. Imitation Learning Tic Tac Toe AI 2/ttt_diagnostic.py +351 -0
  8. Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.30.57 PM.png +3 -0
  9. Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.32 PM.png +3 -0
  10. Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.43 PM.png +3 -0
  11. Imitation Learning Tic Tac Toe AI/analyze_ttt_model.py +377 -0
  12. Imitation Learning Tic Tac Toe AI/requirements.txt +2 -0
  13. Imitation Learning Tic Tac Toe AI/tic_tac_toe_ai.py +483 -0
  14. Imitation Learning Tic Tac Toe AI/ttt_ai_model.pkl +3 -0
  15. Imitation Learning Tic Tac Toe AI/ttt_diagnostic.py +351 -0
  16. Tic Tac Toe RL/app.py +721 -0
  17. Tic Tac Toe RL/eval_models.py +464 -0
  18. Tic Tac Toe RL/model_evaluation_results.csv +52 -0
  19. Tic Tac Toe RL/model_performance_analysis.png +3 -0
  20. Tic Tac Toe RL/models/dueling_dqn_episode_100.pth +3 -0
  21. Tic Tac Toe RL/models/dueling_dqn_episode_1000.pth +3 -0
  22. Tic Tac Toe RL/models/dueling_dqn_episode_1100.pth +3 -0
  23. Tic Tac Toe RL/models/dueling_dqn_episode_1200.pth +3 -0
  24. Tic Tac Toe RL/models/dueling_dqn_episode_1300.pth +3 -0
  25. Tic Tac Toe RL/models/dueling_dqn_episode_1400.pth +3 -0
  26. Tic Tac Toe RL/models/dueling_dqn_episode_1500.pth +3 -0
  27. Tic Tac Toe RL/models/dueling_dqn_episode_1600.pth +3 -0
  28. Tic Tac Toe RL/models/dueling_dqn_episode_1700.pth +3 -0
  29. Tic Tac Toe RL/models/dueling_dqn_episode_1800.pth +3 -0
  30. Tic Tac Toe RL/models/dueling_dqn_episode_1900.pth +3 -0
  31. Tic Tac Toe RL/models/dueling_dqn_episode_200.pth +3 -0
  32. Tic Tac Toe RL/models/dueling_dqn_episode_2000.pth +3 -0
  33. Tic Tac Toe RL/models/dueling_dqn_episode_2100.pth +3 -0
  34. Tic Tac Toe RL/models/dueling_dqn_episode_2200.pth +3 -0
  35. Tic Tac Toe RL/models/dueling_dqn_episode_2300.pth +3 -0
  36. Tic Tac Toe RL/models/dueling_dqn_episode_2400.pth +3 -0
  37. Tic Tac Toe RL/models/dueling_dqn_episode_2500.pth +3 -0
  38. Tic Tac Toe RL/models/dueling_dqn_episode_2600.pth +3 -0
  39. Tic Tac Toe RL/models/dueling_dqn_episode_2700.pth +3 -0
  40. Tic Tac Toe RL/models/dueling_dqn_episode_2800.pth +3 -0
  41. Tic Tac Toe RL/models/dueling_dqn_episode_2900.pth +3 -0
  42. Tic Tac Toe RL/models/dueling_dqn_episode_300.pth +3 -0
  43. Tic Tac Toe RL/models/dueling_dqn_episode_3000.pth +3 -0
  44. Tic Tac Toe RL/models/dueling_dqn_episode_3100.pth +3 -0
  45. Tic Tac Toe RL/models/dueling_dqn_episode_3200.pth +3 -0
  46. Tic Tac Toe RL/models/dueling_dqn_episode_3300.pth +3 -0
  47. Tic Tac Toe RL/models/dueling_dqn_episode_3400.pth +3 -0
  48. Tic Tac Toe RL/models/dueling_dqn_episode_3500.pth +3 -0
  49. Tic Tac Toe RL/models/dueling_dqn_episode_3600.pth +3 -0
  50. Tic Tac Toe RL/models/dueling_dqn_episode_3700.pth +3 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.30.57 PM.png filter=lfs diff=lfs merge=lfs -text
37
+ Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.33.32 PM.png filter=lfs diff=lfs merge=lfs -text
38
+ Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.33.43 PM.png filter=lfs diff=lfs merge=lfs -text
39
+ output.mp4 filter=lfs diff=lfs merge=lfs -text
40
+ Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]RL/model_performance_analysis.png filter=lfs diff=lfs merge=lfs -text
Imitation Learning Tic Tac Toe AI 2/analyze_ttt_model.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import os
4
+ from collections import deque
5
+ import json
6
+
7
+ def load_pickle_file(filename="ttt_ai_model.pkl"):
8
+ """
9
+ Load and analyze the Tic-Tac-Toe AI model pickle file.
10
+
11
+ Args:
12
+ filename (str): Path to the pickle file
13
+
14
+ Returns:
15
+ dict: The loaded data or None if file doesn't exist
16
+ """
17
+
18
+ if not os.path.exists(filename):
19
+ print(f"❌ File '{filename}' not found!")
20
+ print("Possible reasons:")
21
+ print("1. The game hasn't been played yet")
22
+ print("2. The file was saved with a different name")
23
+ print("3. The file is in a different directory")
24
+ return None
25
+
26
+ try:
27
+ print(f"📂 Opening '{filename}'...")
28
+
29
+ # Load the pickle file
30
+ with open(filename, 'rb') as f:
31
+ data = pickle.load(f)
32
+
33
+ print("✅ File loaded successfully!")
34
+ print("\n" + "="*60)
35
+
36
+ return data
37
+
38
+ except Exception as e:
39
+ print(f"❌ Error loading pickle file: {e}")
40
+ print(f"Error type: {type(e).__name__}")
41
+ return None
42
+
43
+ def analyze_model(data):
44
+ """
45
+ Analyze and display information about the AI model.
46
+
47
+ Args:
48
+ data (dict): The loaded pickle data
49
+ """
50
+
51
+ if not data:
52
+ print("No data to analyze")
53
+ return
54
+
55
+ print("📊 MODEL ANALYSIS")
56
+ print("="*60)
57
+
58
+ # Check what keys are available
59
+ print(f"Keys in data: {list(data.keys())}")
60
+
61
+ # Analyze model matrix if present
62
+ if 'model' in data:
63
+ model = data['model']
64
+ print(f"\n🤖 AI Model Information:")
65
+ print(f" Shape: {model.shape}")
66
+ print(f" Size: {model.size:,} elements")
67
+ print(f" Data type: {model.dtype}")
68
+
69
+ # Calculate some statistics
70
+ print(f"\n📈 Model Statistics:")
71
+ print(f" Non-zero entries: {np.count_nonzero(model):,}")
72
+ print(f" Zero entries: {np.sum(model == 0):,}")
73
+ print(f" Sparsity: {(np.sum(model == 0) / model.size) * 100:.2f}%")
74
+
75
+ # Get min, max, mean values
76
+ if model.size > 0:
77
+ flat_model = model.flatten()
78
+ non_zero_values = flat_model[flat_model != 0]
79
+
80
+ if len(non_zero_values) > 0:
81
+ print(f" Min value (non-zero): {non_zero_values.min():.6f}")
82
+ print(f" Max value: {flat_model.max():.6f}")
83
+ print(f" Mean value (non-zero): {non_zero_values.mean():.6f}")
84
+ print(f" Std dev (non-zero): {non_zero_values.std():.6f}")
85
+
86
+ # Count of positive vs negative values
87
+ positive = np.sum(flat_model > 0)
88
+ negative = np.sum(flat_model < 0)
89
+ print(f" Positive values: {positive:,}")
90
+ print(f" Negative values: {negative:,}")
91
+
92
+ # Analyze experience replay if present
93
+ if 'experience' in data:
94
+ experience = data['experience']
95
+ print(f"\n🎮 Experience Replay Buffer:")
96
+ print(f" Number of experiences: {len(experience):,}")
97
+
98
+ if experience:
99
+ # Show first few experiences
100
+ print(f" Sample experience (first):")
101
+ if hasattr(experience[0], '__len__'):
102
+ print(f" Length: {len(experience[0])}")
103
+ if len(experience[0]) > 0:
104
+ print(f" First element type: {type(experience[0][0])}")
105
+
106
+ # Check for other data
107
+ for key in data.keys():
108
+ if key not in ['model', 'experience']:
109
+ value = data[key]
110
+ print(f"\n🔍 {key}:")
111
+ print(f" Type: {type(value)}")
112
+ if isinstance(value, (list, tuple, deque)):
113
+ print(f" Length: {len(value)}")
114
+ elif isinstance(value, dict):
115
+ print(f" Keys: {list(value.keys())[:5]}..." if len(value) > 5 else f" Keys: {list(value.keys())}")
116
+ elif isinstance(value, np.ndarray):
117
+ print(f" Shape: {value.shape}")
118
+
119
+ def examine_specific_states(model, num_states=5):
120
+ """
121
+ Examine specific state-action values in the model.
122
+
123
+ Args:
124
+ model (np.ndarray): The AI model
125
+ num_states (int): Number of states to examine
126
+ """
127
+
128
+ print(f"\n🔬 Examining {num_states} specific states:")
129
+ print("-"*40)
130
+
131
+ # Find states with non-zero values
132
+ non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
133
+
134
+ if len(non_zero_indices) > 0:
135
+ print(f"Found {len(non_zero_indices):,} states with learned values")
136
+
137
+ # Sample some states to examine
138
+ if len(non_zero_indices) > num_states:
139
+ sample_indices = np.random.choice(non_zero_indices, num_states, replace=False)
140
+ else:
141
+ sample_indices = non_zero_indices
142
+
143
+ for i, state_idx in enumerate(sample_indices):
144
+ q_values = model[state_idx]
145
+ non_zero_q = q_values[q_values != 0]
146
+
147
+ if len(non_zero_q) > 0:
148
+ print(f"\nState {i+1} (Index {state_idx}):")
149
+ print(f" Non-zero Q-values: {len(non_zero_q)}")
150
+ print(f" Actions with values:")
151
+ for action in np.where(q_values != 0)[0]:
152
+ print(f" Action {action}: {q_values[action]:.4f}")
153
+ else:
154
+ print("No states with learned values found yet.")
155
+
156
+ def decode_state(state_index):
157
+ """
158
+ Convert a state index back to a board representation.
159
+ This assumes the same encoding used in the game.
160
+
161
+ Args:
162
+ state_index (int): The encoded state index
163
+
164
+ Returns:
165
+ list: Board representation (0=empty, 1=X, 2=O)
166
+ """
167
+ board = [0] * 9
168
+ temp_index = state_index
169
+
170
+ for i in range(9):
171
+ board[i] = temp_index % 3
172
+ temp_index //= 3
173
+
174
+ return board
175
+
176
+ def display_board(board):
177
+ """
178
+ Display a Tic-Tac-Toe board in human-readable format.
179
+
180
+ Args:
181
+ board (list): Board representation
182
+ """
183
+ symbols = {0: '.', 1: 'X', 2: 'O'}
184
+
185
+ print("Board state:")
186
+ for row in range(3):
187
+ row_chars = [symbols[board[row*3 + col]] for col in range(3)]
188
+ print(" " + " | ".join(row_chars))
189
+ if row < 2:
190
+ print(" " + "-" * 9)
191
+
192
+ def explore_model_interactively(model):
193
+ """
194
+ Interactive exploration of the model.
195
+
196
+ Args:
197
+ model (np.ndarray): The AI model
198
+ """
199
+
200
+ print("\n🎯 INTERACTIVE EXPLORATION")
201
+ print("="*60)
202
+
203
+ while True:
204
+ print("\nOptions:")
205
+ print("1. Look up a specific state")
206
+ print("2. Find states with highest Q-values")
207
+ print("3. Find best action for a given state")
208
+ print("4. Exit exploration")
209
+
210
+ choice = input("\nEnter your choice (1-4): ").strip()
211
+
212
+ if choice == '1':
213
+ try:
214
+ state_idx = int(input("Enter state index (0-19682): "))
215
+ if 0 <= state_idx < model.shape[0]:
216
+ board = decode_state(state_idx)
217
+ display_board(board)
218
+
219
+ q_values = model[state_idx]
220
+ print(f"\nQ-values for state {state_idx}:")
221
+ for action in range(9):
222
+ if q_values[action] != 0:
223
+ print(f" Action {action} (row {action//3}, col {action%3}): {q_values[action]:.4f}")
224
+
225
+ # Show best action
226
+ best_action = np.argmax(q_values)
227
+ print(f"\nBest action: {best_action} (row {best_action//3}, col {best_action%3})")
228
+ else:
229
+ print("Invalid state index!")
230
+ except ValueError:
231
+ print("Please enter a valid number!")
232
+
233
+ elif choice == '2':
234
+ try:
235
+ num_states = int(input("How many top states? (1-100): "))
236
+ num_states = max(1, min(100, num_states))
237
+
238
+ # Find states with maximum Q-values
239
+ max_q_per_state = np.max(model, axis=1)
240
+ top_indices = np.argsort(max_q_per_state)[-num_states:][::-1]
241
+
242
+ print(f"\nTop {num_states} states with highest Q-values:")
243
+ for i, idx in enumerate(top_indices[:10]): # Show first 10
244
+ max_q = max_q_per_state[idx]
245
+ if max_q > 0:
246
+ board = decode_state(idx)
247
+ print(f"\n{i+1}. State {idx} (max Q: {max_q:.4f})")
248
+ display_board(board)
249
+ except ValueError:
250
+ print("Please enter a valid number!")
251
+
252
+ elif choice == '3':
253
+ # Create a board manually
254
+ print("\nEnter board state (9 numbers, 0=empty, 1=X, 2=O)")
255
+ print("Example: 0 0 1 0 2 0 0 0 0")
256
+
257
+ try:
258
+ board_input = input("Board: ").strip()
259
+ if len(board_input) == 0:
260
+ # Use default example
261
+ board = [0, 0, 1, 0, 2, 0, 0, 0, 0]
262
+ else:
263
+ board = [int(x) for x in board_input.split()]
264
+
265
+ if len(board) != 9 or any(x not in [0, 1, 2] for x in board):
266
+ raise ValueError("Invalid board")
267
+
268
+ display_board(board)
269
+
270
+ # Convert to state index
271
+ state_idx = 0
272
+ for i, cell in enumerate(board):
273
+ state_idx += cell * (3 ** i)
274
+
275
+ q_values = model[state_idx]
276
+
277
+ # Only show available moves
278
+ available_moves = [i for i, cell in enumerate(board) if cell == 0]
279
+
280
+ print("\nAvailable moves and their Q-values:")
281
+ for move in available_moves:
282
+ q_val = q_values[move]
283
+ row, col = divmod(move, 3)
284
+ print(f" Move {move} (row {row}, col {col}): {q_val:.4f}")
285
+
286
+ if available_moves:
287
+ best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
288
+ row, col = divmod(best_move, 3)
289
+ print(f"\nRecommended move: {best_move} (row {row}, col {col})")
290
+ else:
291
+ print("No available moves!")
292
+
293
+ except Exception as e:
294
+ print(f"Error: {e}")
295
+
296
+ elif choice == '4':
297
+ print("Exiting interactive exploration.")
298
+ break
299
+
300
+ else:
301
+ print("Invalid choice!")
302
+
303
+ def save_model_summary(data, filename="model_summary.json"):
304
+ """
305
+ Save a summary of the model to a JSON file.
306
+
307
+ Args:
308
+ data (dict): The loaded pickle data
309
+ filename (str): Output JSON filename
310
+ """
311
+ if not data:
312
+ return
313
+
314
+ summary = {}
315
+
316
+ if 'model' in data:
317
+ model = data['model']
318
+ summary['model'] = {
319
+ 'shape': model.shape,
320
+ 'size': int(model.size),
321
+ 'non_zero_entries': int(np.count_nonzero(model)),
322
+ 'sparsity': float((np.sum(model == 0) / model.size) * 100)
323
+ }
324
+
325
+ if 'experience' in data:
326
+ experience = data['experience']
327
+ summary['experience'] = {
328
+ 'count': len(experience)
329
+ }
330
+
331
+ try:
332
+ with open(filename, 'w') as f:
333
+ json.dump(summary, f, indent=2)
334
+ print(f"\n💾 Model summary saved to '{filename}'")
335
+ except Exception as e:
336
+ print(f"Error saving summary: {e}")
337
+
338
+ def main():
339
+ """
340
+ Main function to load and analyze the pickle file.
341
+ """
342
+ print("🔍 Tic-Tac-Toe AI Model Analyzer")
343
+ print("="*60)
344
+
345
+ # Try to load the pickle file
346
+ filename = "ttt_ai_model.pkl"
347
+ data = load_pickle_file(filename)
348
+
349
+ if data:
350
+ # Analyze the model
351
+ analyze_model(data)
352
+
353
+ # If model exists, do more detailed analysis
354
+ if 'model' in data:
355
+ # Examine specific states
356
+ examine_specific_states(data['model'])
357
+
358
+ # Interactive exploration
359
+ explore = input("\nWould you like to explore the model interactively? (y/n): ").strip().lower()
360
+ if explore == 'y':
361
+ explore_model_interactively(data['model'])
362
+
363
+ # Save summary
364
+ save = input("\nWould you like to save a summary? (y/n): ").strip().lower()
365
+ if save == 'y':
366
+ save_model_summary(data)
367
+
368
+ # Additional file info
369
+ print("\n📄 File Information:")
370
+ print(f" File size: {os.path.getsize(filename):,} bytes")
371
+ print(f" Last modified: {os.path.getmtime(filename):.0f}")
372
+
373
+ print("\n" + "="*60)
374
+ print("Analysis complete!")
375
+
376
+ if __name__ == "__main__":
377
+ main()
Imitation Learning Tic Tac Toe AI 2/create_pretrained_ai.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import random
4
+
5
+ def create_basic_ai_knowledge():
6
+ """Create a basic Tic-Tac-Toe AI with fundamental strategies"""
7
+
8
+ q_table = {}
9
+
10
+ # 1. Empty board - prefer center and corners
11
+ empty_board = (0,0,0,0,0,0,0,0,0)
12
+ q_values = [0.0] * 9
13
+ q_values[4] = 0.8 # Center is best
14
+ q_values[0] = q_values[2] = q_values[6] = q_values[8] = 0.7 # Corners are good
15
+ q_values[1] = q_values[3] = q_values[5] = q_values[7] = 0.5 # Edges are okay
16
+ q_table[empty_board] = q_values
17
+
18
+ # 2. Opponent in center - take corners
19
+ center_taken = (0,0,0,0,1,0,0,0,0) # X in center
20
+ q_values = [0.0] * 9
21
+ q_values[0] = q_values[2] = q_values[6] = q_values[8] = 0.9 # Corners are best
22
+ q_table[center_taken] = q_values
23
+
24
+ # 3. Winning moves - very high value
25
+ # Example: two O's in a row/col/diag
26
+ winning_patterns = [
27
+ # Horizontal
28
+ (2,2,0,0,0,0,0,0,0), # Need position 2
29
+ (0,2,2,0,0,0,0,0,0), # Need position 0
30
+ (2,0,2,0,0,0,0,0,0), # Need position 1
31
+
32
+ # Vertical
33
+ (2,0,0,2,0,0,0,0,0), # Need position 6
34
+ (0,2,0,0,2,0,0,0,0), # Need position 7
35
+ (0,0,2,0,0,2,0,0,0), # Need position 8
36
+
37
+ # Diagonal
38
+ (2,0,0,0,2,0,0,0,0), # Need position 8
39
+ (0,0,2,0,2,0,0,0,0), # Need position 6
40
+ ]
41
+
42
+ for board in winning_patterns:
43
+ q_values = [0.0] * 9
44
+ # Find empty spot that completes the line
45
+ for i in range(9):
46
+ if board[i] == 0:
47
+ # Check if this completes three in a row
48
+ test_board = list(board)
49
+ test_board[i] = 2
50
+
51
+ # Check if this is a winning move
52
+ winning = False
53
+ lines = [
54
+ [0,1,2], [3,4,5], [6,7,8], # Rows
55
+ [0,3,6], [1,4,7], [2,5,8], # Columns
56
+ [0,4,8], [2,4,6] # Diagonals
57
+ ]
58
+
59
+ for line in lines:
60
+ if (test_board[line[0]] == test_board[line[1]] ==
61
+ test_board[line[2]] == 2):
62
+ winning = True
63
+ break
64
+
65
+ if winning:
66
+ q_values[i] = 1.0 # Very high value for winning move
67
+
68
+ q_table[board] = q_values
69
+
70
+ # 4. Blocking moves - high value
71
+ blocking_patterns = [
72
+ # Block horizontal
73
+ (1,1,0,0,0,0,0,0,0), # Block at 2
74
+ (0,1,1,0,0,0,0,0,0), # Block at 0
75
+ (1,0,1,0,0,0,0,0,0), # Block at 1
76
+
77
+ # Block vertical
78
+ (1,0,0,1,0,0,0,0,0), # Block at 6
79
+ (0,1,0,0,1,0,0,0,0), # Block at 7
80
+ (0,0,1,0,0,1,0,0,0), # Block at 8
81
+ ]
82
+
83
+ for board in blocking_patterns:
84
+ q_values = [0.0] * 9
85
+ # Find blocking move
86
+ for i in range(9):
87
+ if board[i] == 0:
88
+ # Check if this blocks opponent
89
+ test_board = list(board)
90
+ test_board[i] = 1 # Temporarily place opponent's piece
91
+
92
+ # Check if opponent would win
93
+ opponent_wins = False
94
+ lines = [
95
+ [0,1,2], [3,4,5], [6,7,8],
96
+ [0,3,6], [1,4,7], [2,5,8],
97
+ [0,4,8], [2,4,6]
98
+ ]
99
+
100
+ for line in lines:
101
+ if (test_board[line[0]] == test_board[line[1]] ==
102
+ test_board[line[2]] == 1):
103
+ opponent_wins = True
104
+ break
105
+
106
+ if opponent_wins:
107
+ q_values[i] = 0.9 # High value for blocking
108
+
109
+ q_table[board] = q_values
110
+
111
+ # Save the pre-trained AI
112
+ data = {
113
+ 'q_table': q_table,
114
+ 'training_history': [],
115
+ 'player_symbol': 2
116
+ }
117
+
118
+ with open('ttt_ai_pretrained.pkl', 'wb') as f:
119
+ pickle.dump(data, f)
120
+
121
+ print(f"Created pre-trained AI with {len(q_table)} board states")
122
+ print("Basic strategies included:")
123
+ print("1. Prefer center and corners")
124
+ print("2. Take corners when opponent has center")
125
+ print("3. Recognize winning moves")
126
+ print("4. Recognize blocking moves")
127
+
128
+ if __name__ == '__main__':
129
+ create_basic_ai_knowledge()
Imitation Learning Tic Tac Toe AI 2/improved_game.py ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import numpy as np
3
+ import random
4
+ from collections import defaultdict
5
+ import pickle
6
+ import os
7
+ from PyQt5.QtWidgets import *
8
+ from PyQt5.QtCore import *
9
+ from PyQt5.QtGui import *
10
+
11
+ class TicTacToeAI:
12
+ """Improved AI with proper imitation learning"""
13
+
14
+ def __init__(self, player_symbol=2): # Default is O
15
+ self.q_table = defaultdict(lambda: [0.0] * 9) # State -> [Q-values for 9 moves]
16
+ self.learning_rate = 0.3 # Increased for faster learning
17
+ self.exploration_rate = 0.3 # Start with exploration
18
+ self.discount_factor = 0.9
19
+ self.player_symbol = player_symbol
20
+ self.last_state = None
21
+ self.last_action = None
22
+ self.training_history = []
23
+
24
+ self.load_model()
25
+
26
+ def board_to_key(self, board):
27
+ """Convert board to hashable key"""
28
+ return tuple(board)
29
+
30
+ def get_available_moves(self, board):
31
+ """Get list of available positions"""
32
+ return [i for i, cell in enumerate(board) if cell == 0]
33
+
34
+ def choose_action(self, board, available_moves):
35
+ """Choose action using epsilon-greedy policy"""
36
+ board_key = self.board_to_key(board)
37
+
38
+ # Exploration: random move
39
+ if random.random() < self.exploration_rate:
40
+ action = random.choice(available_moves)
41
+ # Exploitation: best known move
42
+ else:
43
+ q_values = self.q_table[board_key]
44
+ # Filter to available moves
45
+ available_q = [(q_values[move], move) for move in available_moves]
46
+ # Choose move with highest Q-value
47
+ action = max(available_q, key=lambda x: x[0])[1]
48
+
49
+ # Store for learning
50
+ self.last_state = board_key
51
+ self.last_action = action
52
+
53
+ return action
54
+
55
+ def learn(self, reward, next_board, game_over):
56
+ """Q-learning update"""
57
+ if self.last_state is None or self.last_action is None:
58
+ return
59
+
60
+ board_key = self.last_state
61
+ action = self.last_action
62
+
63
+ # Current Q-value
64
+ current_q = self.q_table[board_key][action]
65
+
66
+ if game_over:
67
+ # Terminal state, no future rewards
68
+ future_q = 0
69
+ else:
70
+ # Estimate future reward
71
+ next_key = self.board_to_key(next_board)
72
+ next_available = self.get_available_moves(next_board)
73
+ if next_available:
74
+ future_q = max(self.q_table[next_key][move] for move in next_available)
75
+ else:
76
+ future_q = 0
77
+
78
+ # Q-learning update
79
+ new_q = current_q + self.learning_rate * (reward + self.discount_factor * future_q - current_q)
80
+ self.q_table[board_key][action] = new_q
81
+
82
+ # Record for analysis
83
+ self.training_history.append({
84
+ 'state': board_key,
85
+ 'action': action,
86
+ 'reward': reward,
87
+ 'new_q': new_q
88
+ })
89
+
90
+ # Clear for next move
91
+ self.last_state = None
92
+ self.last_action = None
93
+
94
+ def learn_from_observation(self, board_before, action, board_after, winner):
95
+ """Learn by observing human player moves"""
96
+ board_key = self.board_to_key(board_before)
97
+
98
+ # Determine reward based on game outcome
99
+ if winner == self.player_symbol: # AI's symbol won
100
+ reward = 1.0
101
+ elif winner == 3 - self.player_symbol: # Opponent won
102
+ reward = -1.0
103
+ elif winner is None: # Draw
104
+ reward = 0.1
105
+ else:
106
+ reward = 0
107
+
108
+ # Update Q-value
109
+ current_q = self.q_table[board_key][action]
110
+ new_q = current_q + self.learning_rate * (reward - current_q)
111
+ self.q_table[board_key][action] = new_q
112
+
113
+ def save_model(self):
114
+ """Save Q-table to file"""
115
+ try:
116
+ # Convert defaultdict to regular dict for pickling
117
+ q_table_dict = dict(self.q_table)
118
+ data = {
119
+ 'q_table': q_table_dict,
120
+ 'training_history': self.training_history[-1000:], # Keep last 1000
121
+ 'player_symbol': self.player_symbol
122
+ }
123
+ with open('ttt_ai_improved.pkl', 'wb') as f:
124
+ pickle.dump(data, f)
125
+ print(f"Model saved with {len(q_table_dict)} states")
126
+ except Exception as e:
127
+ print(f"Error saving model: {e}")
128
+
129
+ def load_model(self):
130
+ """Load Q-table from file"""
131
+ filename = 'ttt_ai_improved.pkl'
132
+ if os.path.exists(filename):
133
+ try:
134
+ with open(filename, 'rb') as f:
135
+ data = pickle.load(f)
136
+ self.q_table = defaultdict(lambda: [0.0] * 9, data.get('q_table', {}))
137
+ self.training_history = data.get('training_history', [])
138
+ self.player_symbol = data.get('player_symbol', 2)
139
+ print(f"Model loaded with {len(self.q_table)} states")
140
+ except Exception as e:
141
+ print(f"Error loading model: {e}")
142
+
143
+ class TicTacToeGame:
144
+ """Game logic - unchanged"""
145
+
146
+ def __init__(self):
147
+ self.reset()
148
+
149
+ def reset(self):
150
+ self.board = [0] * 9
151
+ self.current_player = 1
152
+ self.winner = None
153
+ self.game_over = False
154
+ self.moves = 0
155
+
156
+ def make_move(self, position):
157
+ if self.board[position] != 0 or self.game_over:
158
+ return False
159
+
160
+ self.board[position] = self.current_player
161
+ self.moves += 1
162
+
163
+ self.winner = self.check_winner()
164
+ if self.winner or self.moves == 9:
165
+ self.game_over = True
166
+ else:
167
+ self.current_player = 3 - self.current_player
168
+
169
+ return True
170
+
171
+ def check_winner(self):
172
+ winning_combinations = [
173
+ [0, 1, 2], [3, 4, 5], [6, 7, 8],
174
+ [0, 3, 6], [1, 4, 7], [2, 5, 8],
175
+ [0, 4, 8], [2, 4, 6]
176
+ ]
177
+
178
+ for combo in winning_combinations:
179
+ if (self.board[combo[0]] == self.board[combo[1]] ==
180
+ self.board[combo[2]] != 0):
181
+ return 'X' if self.board[combo[0]] == 1 else 'O'
182
+
183
+ return None
184
+
185
+ def get_board_state(self):
186
+ return self.board.copy()
187
+
188
+ class ImprovedGame(QMainWindow):
189
+ """Improved game with working imitation learning"""
190
+
191
+ def __init__(self):
192
+ super().__init__()
193
+ self.game = TicTacToeGame()
194
+ self.ai = TicTacToeAI(player_symbol=2) # AI plays as O
195
+
196
+ # Training parameters
197
+ self.training_mode = True
198
+ self.observation_mode = True # Learn from human moves
199
+ self.games_played = 0
200
+ self.ai_wins = 0
201
+ self.human_wins = 0
202
+ self.ties = 0
203
+
204
+ self.init_ui()
205
+ self.start_new_game()
206
+
207
+ def init_ui(self):
208
+ self.setWindowTitle('Improved Imitation Learning Tic-Tac-Toe')
209
+ self.setGeometry(100, 100, 450, 600)
210
+
211
+ central_widget = QWidget()
212
+ self.setCentralWidget(central_widget)
213
+ layout = QVBoxLayout()
214
+
215
+ # Game board
216
+ self.board_widget = self.create_board()
217
+ layout.addWidget(self.board_widget)
218
+
219
+ # Status
220
+ status_layout = QHBoxLayout()
221
+ self.status_label = QLabel("Your turn (X)")
222
+ self.status_label.setFont(QFont('Arial', 14))
223
+ status_layout.addWidget(self.status_label)
224
+
225
+ self.stats_label = QLabel("Games: 0 | AI: 0 | You: 0 | Ties: 0")
226
+ status_layout.addWidget(self.stats_label)
227
+ layout.addLayout(status_layout)
228
+
229
+ # Training controls
230
+ controls = QHBoxLayout()
231
+
232
+ self.train_btn = QPushButton("Training: ON")
233
+ self.train_btn.clicked.connect(self.toggle_training)
234
+ controls.addWidget(self.train_btn)
235
+
236
+ self.observe_btn = QPushButton("Learn from You: ON")
237
+ self.observe_btn.clicked.connect(self.toggle_observation)
238
+ controls.addWidget(self.observe_btn)
239
+
240
+ self.new_game_btn = QPushButton("New Game")
241
+ self.new_game_btn.clicked.connect(self.start_new_game)
242
+ controls.addWidget(self.new_game_btn)
243
+
244
+ self.save_btn = QPushButton("Save AI")
245
+ self.save_btn.clicked.connect(self.save_ai)
246
+ controls.addWidget(self.save_btn)
247
+
248
+ layout.addLayout(controls)
249
+
250
+ # Learning parameters
251
+ params = QGridLayout()
252
+
253
+ params.addWidget(QLabel("Learning Rate:"), 0, 0)
254
+ self.lr_slider = QSlider(Qt.Horizontal)
255
+ self.lr_slider.setRange(1, 50)
256
+ self.lr_slider.setValue(int(self.ai.learning_rate * 100))
257
+ self.lr_slider.valueChanged.connect(self.update_learning_rate)
258
+ params.addWidget(self.lr_slider, 0, 1)
259
+
260
+ params.addWidget(QLabel("Exploration:"), 1, 0)
261
+ self.exp_slider = QSlider(Qt.Horizontal)
262
+ self.exp_slider.setRange(0, 100)
263
+ self.exp_slider.setValue(int(self.ai.exploration_rate * 100))
264
+ self.exp_slider.valueChanged.connect(self.update_exploration)
265
+ params.addWidget(self.exp_slider, 1, 1)
266
+
267
+ layout.addLayout(params)
268
+
269
+ # Learning log
270
+ self.log_text = QTextEdit()
271
+ self.log_text.setMaximumHeight(150)
272
+ self.log_text.setReadOnly(True)
273
+ layout.addWidget(self.log_text)
274
+
275
+ central_widget.setLayout(layout)
276
+
277
+ # AI move timer
278
+ self.ai_timer = QTimer()
279
+ self.ai_timer.timeout.connect(self.ai_move)
280
+
281
+ self.log("AI initialized. Play as X to train the AI!")
282
+ self.log(f"AI knows {len(self.ai.q_table)} board states")
283
+
284
+ def create_board(self):
285
+ widget = QWidget()
286
+ grid = QGridLayout()
287
+ grid.setSpacing(5)
288
+
289
+ self.buttons = []
290
+ for i in range(9):
291
+ btn = QPushButton('')
292
+ btn.setFixedSize(100, 100)
293
+ btn.setFont(QFont('Arial', 24))
294
+ btn.clicked.connect(lambda checked, pos=i: self.human_move(pos))
295
+
296
+ row, col = divmod(i, 3)
297
+ grid.addWidget(btn, row, col)
298
+ self.buttons.append(btn)
299
+
300
+ widget.setLayout(grid)
301
+ return widget
302
+
303
+ def update_board(self):
304
+ """Update button display from game state"""
305
+ symbols = {0: '', 1: 'X', 2: 'O'}
306
+ colors = {0: 'black', 1: 'red', 2: 'blue'}
307
+
308
+ for i, btn in enumerate(self.buttons):
309
+ symbol = symbols[self.game.board[i]]
310
+ color = colors[self.game.board[i]]
311
+ btn.setText(symbol)
312
+ btn.setStyleSheet(f"color: {color}; font-weight: bold;")
313
+
314
+ def human_move(self, position):
315
+ """Handle human player move"""
316
+ if self.game.game_over or self.game.current_player != 1:
317
+ return
318
+
319
+ # Record board before move for learning
320
+ board_before = self.game.get_board_state()
321
+
322
+ if self.game.make_move(position):
323
+ self.update_board()
324
+
325
+ # If learning from observation is enabled
326
+ if self.training_mode and self.observation_mode:
327
+ # The AI learns from the human move
328
+ self.ai.learn_from_observation(
329
+ board_before,
330
+ position,
331
+ self.game.get_board_state(),
332
+ None # Game not over yet
333
+ )
334
+ self.log(f"AI observed your move at {position}")
335
+
336
+ if self.game.game_over:
337
+ self.end_game()
338
+ else:
339
+ # AI's turn
340
+ self.status_label.setText("AI thinking...")
341
+ self.ai_timer.start(300) # Shorter delay
342
+
343
+ def ai_move(self):
344
+ """Handle AI player move"""
345
+ self.ai_timer.stop()
346
+
347
+ if self.game.game_over or self.game.current_player != 2:
348
+ return
349
+
350
+ # Get available moves
351
+ available_moves = [i for i, cell in enumerate(self.game.board) if cell == 0]
352
+
353
+ if available_moves:
354
+ # Choose action
355
+ action = self.ai.choose_action(self.game.board, available_moves)
356
+
357
+ # Record state before move for Q-learning
358
+ board_before = self.game.get_board_state()
359
+
360
+ if self.game.make_move(action):
361
+ self.update_board()
362
+
363
+ # Q-learning update
364
+ if self.training_mode:
365
+ # Determine reward
366
+ if self.game.game_over:
367
+ if self.game.winner == 'O':
368
+ reward = 1.0 # AI won
369
+ elif self.game.winner == 'X':
370
+ reward = -1.0 # AI lost
371
+ else:
372
+ reward = 0.1 # Draw
373
+ else:
374
+ reward = 0 # Intermediate move
375
+
376
+ # Update Q-values
377
+ self.ai.learn(reward, self.game.get_board_state(), self.game.game_over)
378
+
379
+ if self.game.game_over:
380
+ self.end_game()
381
+ else:
382
+ self.status_label.setText("Your turn (X)")
383
+ self.log(f"AI moved to {action}")
384
+
385
+ def end_game(self):
386
+ """Handle game end"""
387
+ winner = self.game.winner
388
+
389
+ # Update statistics
390
+ self.games_played += 1
391
+ if winner == 'X':
392
+ self.human_wins += 1
393
+ result = "You win!"
394
+ elif winner == 'O':
395
+ self.ai_wins += 1
396
+ result = "AI wins!"
397
+ # Strong positive reinforcement for winning
398
+ if self.training_mode:
399
+ self.log("AI won! Giving strong positive reward")
400
+ else:
401
+ self.ties += 1
402
+ result = "It's a tie!"
403
+
404
+ self.update_stats()
405
+
406
+ # Final Q-learning update for the last move
407
+ if self.training_mode and winner is not None:
408
+ # Determine final reward for AI
409
+ final_reward = 1.0 if winner == 'O' else -1.0 if winner == 'X' else 0.1
410
+ self.ai.learn(final_reward, self.game.board, True)
411
+
412
+ # Also learn from the complete game if observation mode is on
413
+ if self.observation_mode:
414
+ self.log(f"AI learned from {result}")
415
+
416
+ # Update status
417
+ self.status_label.setText(result)
418
+
419
+ # Highlight winning cells
420
+ if winner:
421
+ self.highlight_winner()
422
+
423
+ # Offer new game
424
+ QTimer.singleShot(1000, self.offer_new_game)
425
+
426
+ def highlight_winner(self):
427
+ """Highlight winning combination"""
428
+ winning_combinations = [
429
+ [0, 1, 2], [3, 4, 5], [6, 7, 8],
430
+ [0, 3, 6], [1, 4, 7], [2, 5, 8],
431
+ [0, 4, 8], [2, 4, 6]
432
+ ]
433
+
434
+ for combo in winning_combinations:
435
+ if (self.game.board[combo[0]] == self.game.board[combo[1]] ==
436
+ self.game.board[combo[2]] != 0):
437
+ for pos in combo:
438
+ self.buttons[pos].setStyleSheet(
439
+ "background-color: lightgreen; font-weight: bold;"
440
+ )
441
+ break
442
+
443
+ def offer_new_game(self):
444
+ """Ask if player wants to play again"""
445
+ msg = QMessageBox()
446
+ msg.setWindowTitle("Game Over")
447
+ msg.setText(f"{self.status_label.text()}")
448
+ msg.setInformativeText("Play again?")
449
+ msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
450
+
451
+ if msg.exec_() == QMessageBox.Yes:
452
+ self.start_new_game()
453
+
454
+ def start_new_game(self):
455
+ """Start a new game"""
456
+ self.game.reset()
457
+ self.update_board()
458
+
459
+ # Reset button colors
460
+ for btn in self.buttons:
461
+ btn.setStyleSheet("")
462
+
463
+ # Always let human start
464
+ self.status_label.setText("Your turn (X)")
465
+
466
+ # Gradually reduce exploration
467
+ if self.games_played > 20:
468
+ self.ai.exploration_rate = max(0.1, self.ai.exploration_rate * 0.95)
469
+ self.exp_slider.setValue(int(self.ai.exploration_rate * 100))
470
+
471
+ self.log(f"New game started (Game {self.games_played + 1})")
472
+ self.log(f"AI exploration: {self.ai.exploration_rate:.2f}")
473
+
474
+ def toggle_training(self):
475
+ """Toggle training mode"""
476
+ self.training_mode = not self.training_mode
477
+ self.train_btn.setText(f"Training: {'ON' if self.training_mode else 'OFF'}")
478
+ self.log(f"Training mode {'enabled' if self.training_mode else 'disabled'}")
479
+
480
+ def toggle_observation(self):
481
+ """Toggle learning from human moves"""
482
+ self.observation_mode = not self.observation_mode
483
+ self.observe_btn.setText(f"Learn from You: {'ON' if self.observation_mode else 'OFF'}")
484
+ self.log(f"Learning from your moves {'enabled' if self.observation_mode else 'disabled'}")
485
+
486
+ def update_learning_rate(self, value):
487
+ """Update learning rate"""
488
+ self.ai.learning_rate = value / 100.0
489
+ self.log(f"Learning rate: {self.ai.learning_rate:.2f}")
490
+
491
+ def update_exploration(self, value):
492
+ """Update exploration rate"""
493
+ self.ai.exploration_rate = value / 100.0
494
+ self.log(f"Exploration rate: {self.ai.exploration_rate:.2f}")
495
+
496
+ def update_stats(self):
497
+ """Update statistics display"""
498
+ self.stats_label.setText(
499
+ f"Games: {self.games_played} | "
500
+ f"AI: {self.ai_wins} | "
501
+ f"You: {self.human_wins} | "
502
+ f"Ties: {self.ties}"
503
+ )
504
+
505
+ def save_ai(self):
506
+ """Save AI model"""
507
+ self.ai.save_model()
508
+ self.log(f"AI model saved! Knows {len(self.ai.q_table)} states")
509
+
510
+ def log(self, message):
511
+ """Add message to log"""
512
+ self.log_text.append(f"[Game {self.games_played}] {message}")
513
+
514
+ def main():
515
+ app = QApplication(sys.argv)
516
+ app.setStyle('Fusion')
517
+
518
+ # Set a nice theme
519
+ palette = QPalette()
520
+ palette.setColor(QPalette.Window, QColor(240, 240, 240))
521
+ palette.setColor(QPalette.WindowText, Qt.black)
522
+ app.setPalette(palette)
523
+
524
+ game = ImprovedGame()
525
+ game.show()
526
+ sys.exit(app.exec_())
527
+
528
+ if __name__ == '__main__':
529
+ main()
Imitation Learning Tic Tac Toe AI 2/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ PyQt5
2
+ numpy
Imitation Learning Tic Tac Toe AI 2/ttt_ai_model_improved.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0b251454a01165321853d8e32658ef4a96b694bdb74eed00f4060d7cd331743
3
+ size 1417366
Imitation Learning Tic Tac Toe AI 2/ttt_diagnostic.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import os
4
+
5
+ def load_ai_model():
6
+ """Load the AI model from pickle file"""
7
+ filename = "ttt_ai_model.pkl"
8
+ if not os.path.exists(filename):
9
+ print("Model file not found!")
10
+ return None
11
+
12
+ with open(filename, 'rb') as f:
13
+ data = pickle.load(f)
14
+
15
+ return data['model']
16
+
17
+ def decode_state(state_index):
18
+ """Convert state index to board representation"""
19
+ board = [0] * 9
20
+ temp_index = state_index
21
+
22
+ for i in range(9):
23
+ board[i] = temp_index % 3
24
+ temp_index //= 3
25
+
26
+ return board
27
+
28
+ def display_board(board):
29
+ """Display Tic-Tac-Toe board"""
30
+ symbols = {0: '.', 1: 'X', 2: 'O'}
31
+
32
+ print("Current board:")
33
+ for row in range(3):
34
+ row_chars = [symbols[board[row*3 + col]] for col in range(3)]
35
+ print(" " + " | ".join(row_chars))
36
+ if row < 2:
37
+ print(" " + "-" * 9)
38
+
39
+ def test_ai_with_common_scenarios(model):
40
+ """Test AI with common Tic-Tac-Toe scenarios"""
41
+
42
+ print("\n🤖 TESTING AI WITH COMMON SCENARIOS")
43
+ print("="*50)
44
+
45
+ test_cases = [
46
+ # Empty board
47
+ {
48
+ "name": "Empty board - first move",
49
+ "board": [0,0,0,0,0,0,0,0,0],
50
+ "expected": "Center (4) or corners (0,2,6,8)"
51
+ },
52
+ # Center taken by opponent
53
+ {
54
+ "name": "Opponent took center",
55
+ "board": [0,0,0,0,1,0,0,0,0],
56
+ "expected": "A corner (0,2,6,8)"
57
+ },
58
+ # Winning move for AI
59
+ {
60
+ "name": "AI can win in one move",
61
+ "board": [2,1,0, # O X .
62
+ 1,2,0, # X O .
63
+ 0,0,0], # . . .
64
+ "expected": "Move 8 to complete diagonal"
65
+ },
66
+ # Block opponent's winning move
67
+ {
68
+ "name": "Block opponent's winning move",
69
+ "board": [1,0,0, # X . .
70
+ 1,2,0, # X O .
71
+ 0,0,0], # . . .
72
+ "expected": "Move 6 to block vertical"
73
+ },
74
+ # Fork opportunity
75
+ {
76
+ "name": "Fork opportunity",
77
+ "board": [2,0,1, # O . X
78
+ 0,1,0, # . X .
79
+ 0,0,0], # . . .
80
+ "expected": "Move 8 to create fork"
81
+ }
82
+ ]
83
+
84
+ for test in test_cases:
85
+ print(f"\n📋 {test['name']}")
86
+ display_board(test['board'])
87
+
88
+ # Convert board to state index
89
+ state_idx = 0
90
+ for i, cell in enumerate(test['board']):
91
+ state_idx += cell * (3 ** i)
92
+
93
+ # Get Q-values for this state
94
+ q_values = model[state_idx]
95
+
96
+ # Get available moves
97
+ available_moves = [i for i, cell in enumerate(test['board']) if cell == 0]
98
+
99
+ if available_moves:
100
+ print("\nAvailable moves and Q-values:")
101
+ for move in available_moves:
102
+ q_val = q_values[move]
103
+ row, col = divmod(move, 3)
104
+ symbol = "⚠️" if q_val > 0 else " "
105
+ print(f"{symbol} Move {move} (row {row}, col {col}): {q_val:.4f}")
106
+
107
+ # AI's recommended move
108
+ best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
109
+ row, col = divmod(best_move, 3)
110
+ print(f"\n🤖 AI's choice: Move {best_move} (row {row}, col {col})")
111
+ print(f"📋 Expected: {test['expected']}")
112
+
113
+ # Check if AI learned something useful
114
+ best_q = q_values[best_move]
115
+ if best_q > 0:
116
+ print("✅ AI has positive association with this move")
117
+ elif best_q < 0:
118
+ print("❌ AI has negative association with this move (thinks it's bad)")
119
+ else:
120
+ print("➖ AI has no learning for this move")
121
+ else:
122
+ print("No available moves!")
123
+
124
+ def analyze_learning_patterns(model):
125
+ """Analyze what patterns the AI has learned"""
126
+
127
+ print("\n🔍 ANALYZING LEARNING PATTERNS")
128
+ print("="*50)
129
+
130
+ # Find all states with non-zero Q-values
131
+ non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
132
+
133
+ print(f"Total states with learning: {len(non_zero_indices)}")
134
+
135
+ # Categorize by game phase
136
+ phases = {
137
+ "early": [], # 0-2 moves made
138
+ "mid": [], # 3-5 moves made
139
+ "late": [] # 6-8 moves made
140
+ }
141
+
142
+ for idx in non_zero_indices:
143
+ board = decode_state(idx)
144
+ moves_made = sum(1 for cell in board if cell != 0)
145
+
146
+ if moves_made <= 2:
147
+ phases["early"].append(idx)
148
+ elif moves_made <= 5:
149
+ phases["mid"].append(idx)
150
+ else:
151
+ phases["late"].append(idx)
152
+
153
+ print(f"\nLearning by game phase:")
154
+ print(f" Early game (0-2 moves): {len(phases['early'])} states")
155
+ print(f" Mid game (3-5 moves): {len(phases['mid'])} states")
156
+ print(f" Late game (6-8 moves): {len(phases['late'])} states")
157
+
158
+ # Analyze Q-value distribution
159
+ all_q_values = model[non_zero_indices].flatten()
160
+ non_zero_q = all_q_values[all_q_values != 0]
161
+
162
+ if len(non_zero_q) > 0:
163
+ print(f"\nQ-value analysis:")
164
+ print(f" Total Q-values: {len(non_zero_q)}")
165
+ print(f" Positive Q-values: {np.sum(non_zero_q > 0)}")
166
+ print(f" Negative Q-values: {np.sum(non_zero_q < 0)}")
167
+ print(f" Average Q-value: {np.mean(non_zero_q):.4f}")
168
+ print(f" Most positive: {np.max(non_zero_q):.4f}")
169
+ print(f" Most negative: {np.min(non_zero_q):.4f}")
170
+
171
+ # Show examples of what AI learned
172
+ print("\n📚 Examples of learned states:")
173
+
174
+ # Find states with positive Q-values
175
+ positive_states = []
176
+ for idx in non_zero_indices:
177
+ if np.any(model[idx] > 0):
178
+ positive_states.append(idx)
179
+
180
+ if positive_states:
181
+ print(f"\nFound {len(positive_states)} states with positive associations")
182
+ for i, idx in enumerate(positive_states[:3]): # Show first 3
183
+ board = decode_state(idx)
184
+ print(f"\nExample {i+1}:")
185
+ display_board(board)
186
+
187
+ q_values = model[idx]
188
+ positive_moves = np.where(q_values > 0)[0]
189
+ print("Moves AI thinks are good:")
190
+ for move in positive_moves:
191
+ print(f" Move {move} (row {move//3}, col {move%3}): {q_values[move]:.4f}")
192
+ else:
193
+ print("No positive associations found - AI hasn't learned winning strategies yet")
194
+
195
+ def check_for_specific_patterns(model):
196
+ """Check if AI has learned specific Tic-Tac-Toe strategies"""
197
+
198
+ print("\n🎯 CHECKING FOR SPECIFIC STRATEGIES")
199
+ print("="*50)
200
+
201
+ strategies = {
202
+ "prefer_center": 0,
203
+ "prefer_corners": 0,
204
+ "prefer_edges": 0,
205
+ "block_opponent": 0,
206
+ "create_fork": 0,
207
+ "avoid_losing": 0
208
+ }
209
+
210
+ # Check common winning/blocking patterns
211
+ patterns_to_check = [
212
+ # Center preference
213
+ ([0,0,0,0,0,0,0,0,0], [4], "prefer_center"),
214
+
215
+ # Corner openings
216
+ ([0,0,0,0,1,0,0,0,0], [0,2,6,8], "prefer_corners"),
217
+
218
+ # Block vertical
219
+ ([1,0,0,1,2,0,0,0,0], [6], "block_opponent"),
220
+
221
+ # Block horizontal
222
+ ([1,1,0,0,2,0,0,0,0], [2], "block_opponent"),
223
+
224
+ # Block diagonal
225
+ ([1,0,0,0,1,0,0,0,0], [8], "block_opponent"),
226
+ ]
227
+
228
+ for board_pattern, good_moves, strategy in patterns_to_check:
229
+ state_idx = 0
230
+ for i, cell in enumerate(board_pattern):
231
+ state_idx += cell * (3 ** i)
232
+
233
+ q_values = model[state_idx]
234
+ available_moves = [i for i, cell in enumerate(board_pattern) if cell == 0]
235
+
236
+ if available_moves:
237
+ # Check if AI prefers any of the good moves
238
+ best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
239
+ if best_move in good_moves:
240
+ strategies[strategy] += 1
241
+ print(f"✅ AI correctly prefers {strategy.replace('_', ' ')} in this scenario")
242
+ else:
243
+ print(f"❌ AI doesn't recognize {strategy.replace('_', ' ')}")
244
+
245
+ print(f"\nStrategy recognition summary:")
246
+ for strategy, count in strategies.items():
247
+ print(f" {strategy}: {count}/1")
248
+
249
+ def train_ai_offline(model, num_games=1000):
250
+ """Simulate games to improve the AI offline"""
251
+
252
+ print("\n🎮 SIMULATING OFFLINE TRAINING")
253
+ print("="*50)
254
+
255
+ print(f"Simulating {num_games} games of self-play...")
256
+
257
+ # Simple self-play simulation
258
+ import random
259
+
260
+ # We'll create a simple improvement by adding some basic strategies
261
+ original_non_zero = np.count_nonzero(model)
262
+
263
+ # Add some basic Tic-Tac-Toe knowledge
264
+ # Center is good
265
+ empty_board_idx = 0 # All zeros
266
+ model[empty_board_idx][4] = 0.1 # Center is good
267
+
268
+ # Corners are good when center is taken
269
+ center_taken_idx = 3**4 # Only center is 1
270
+ for corner in [0, 2, 6, 8]:
271
+ model[center_taken_idx][corner] = 0.08
272
+
273
+ # Blocking is good
274
+ # Example: opponent has two in a row
275
+ for i in range(9):
276
+ board = [0] * 9
277
+ board[i] = 1
278
+ board[(i+3)%9] = 1
279
+ if board[6] == 0: # Check if third in column is empty
280
+ state_idx = 0
281
+ for j, cell in enumerate(board):
282
+ state_idx += cell * (3 ** j)
283
+ blocking_move = 6
284
+ model[state_idx][blocking_move] = 0.15
285
+
286
+ new_non_zero = np.count_nonzero(model)
287
+ improvement = new_non_zero - original_non_zero
288
+
289
+ print(f"Added {improvement} new learned values")
290
+ print("Basic Tic-Tac-Toe strategies have been added to the AI")
291
+
292
+ return model
293
+
294
+ def save_improved_model(model):
295
+ """Save the improved model"""
296
+ filename = "ttt_ai_model_improved.pkl"
297
+
298
+ # Load existing data to preserve experience buffer
299
+ original_filename = "ttt_ai_model.pkl"
300
+ if os.path.exists(original_filename):
301
+ with open(original_filename, 'rb') as f:
302
+ data = pickle.load(f)
303
+ else:
304
+ data = {'model': model, 'experience': []}
305
+
306
+ data['model'] = model
307
+
308
+ with open(filename, 'wb') as f:
309
+ pickle.dump(data, f)
310
+
311
+ print(f"\n💾 Improved model saved to '{filename}'")
312
+
313
+ def main():
314
+ """Main function to analyze and improve the AI"""
315
+
316
+ print("🤖 TIC-TAC-TOE AI DIAGNOSTIC TOOL")
317
+ print("="*60)
318
+
319
+ # Load the model
320
+ model = load_ai_model()
321
+ if model is None:
322
+ return
323
+
324
+ # Test with common scenarios
325
+ test_ai_with_common_scenarios(model)
326
+
327
+ # Analyze learning patterns
328
+ analyze_learning_patterns(model)
329
+
330
+ # Check for specific strategies
331
+ check_for_specific_patterns(model)
332
+
333
+ # Offer to improve the AI
334
+ print("\n" + "="*60)
335
+ improve = input("\nWould you like to add basic Tic-Tac-Toe knowledge to the AI? (y/n): ").strip().lower()
336
+
337
+ if improve == 'y':
338
+ model = train_ai_offline(model)
339
+ save_improved_model(model)
340
+ print("\n✅ AI has been improved with basic strategies!")
341
+ print("Restart the game and use 'ttt_ai_model_improved.pkl' for better performance")
342
+ else:
343
+ print("\n📝 Recommendations for improving the AI through gameplay:")
344
+ print("1. Play more games against the AI")
345
+ print("2. Let the AI watch you play against itself")
346
+ print("3. Adjust learning rate to 0.2-0.3 for faster learning")
347
+ print("4. Reduce exploration rate to 0.1 once AI starts winning")
348
+ print("5. Play both as X and O to teach both perspectives")
349
+
350
+ if __name__ == "__main__":
351
+ main()
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.30.57 PM.png ADDED

Git LFS Details

  • SHA256: 16cb0eda2c9bb0fbaa3badbf822812e0b9ca26c07eeb451596ea1d93886c5e19
  • Pointer size: 132 Bytes
  • Size of remote file: 2.32 MB
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.32 PM.png ADDED

Git LFS Details

  • SHA256: 8778d70b8bfb2df36d4d9106d82c943fe542d4390385ccbbf63f88f1ee876fb3
  • Pointer size: 132 Bytes
  • Size of remote file: 2.25 MB
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.43 PM.png ADDED

Git LFS Details

  • SHA256: e13da2cf84d7c25c069e0a210819184e09bb3ff6f48841ad86163ef0c459c6d9
  • Pointer size: 132 Bytes
  • Size of remote file: 2.26 MB
Imitation Learning Tic Tac Toe AI/analyze_ttt_model.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import os
4
+ from collections import deque
5
+ import json
6
+
7
+ def load_pickle_file(filename="ttt_ai_model.pkl"):
8
+ """
9
+ Load and analyze the Tic-Tac-Toe AI model pickle file.
10
+
11
+ Args:
12
+ filename (str): Path to the pickle file
13
+
14
+ Returns:
15
+ dict: The loaded data or None if file doesn't exist
16
+ """
17
+
18
+ if not os.path.exists(filename):
19
+ print(f"❌ File '{filename}' not found!")
20
+ print("Possible reasons:")
21
+ print("1. The game hasn't been played yet")
22
+ print("2. The file was saved with a different name")
23
+ print("3. The file is in a different directory")
24
+ return None
25
+
26
+ try:
27
+ print(f"📂 Opening '{filename}'...")
28
+
29
+ # Load the pickle file
30
+ with open(filename, 'rb') as f:
31
+ data = pickle.load(f)
32
+
33
+ print("✅ File loaded successfully!")
34
+ print("\n" + "="*60)
35
+
36
+ return data
37
+
38
+ except Exception as e:
39
+ print(f"❌ Error loading pickle file: {e}")
40
+ print(f"Error type: {type(e).__name__}")
41
+ return None
42
+
43
+ def analyze_model(data):
44
+ """
45
+ Analyze and display information about the AI model.
46
+
47
+ Args:
48
+ data (dict): The loaded pickle data
49
+ """
50
+
51
+ if not data:
52
+ print("No data to analyze")
53
+ return
54
+
55
+ print("📊 MODEL ANALYSIS")
56
+ print("="*60)
57
+
58
+ # Check what keys are available
59
+ print(f"Keys in data: {list(data.keys())}")
60
+
61
+ # Analyze model matrix if present
62
+ if 'model' in data:
63
+ model = data['model']
64
+ print(f"\n🤖 AI Model Information:")
65
+ print(f" Shape: {model.shape}")
66
+ print(f" Size: {model.size:,} elements")
67
+ print(f" Data type: {model.dtype}")
68
+
69
+ # Calculate some statistics
70
+ print(f"\n📈 Model Statistics:")
71
+ print(f" Non-zero entries: {np.count_nonzero(model):,}")
72
+ print(f" Zero entries: {np.sum(model == 0):,}")
73
+ print(f" Sparsity: {(np.sum(model == 0) / model.size) * 100:.2f}%")
74
+
75
+ # Get min, max, mean values
76
+ if model.size > 0:
77
+ flat_model = model.flatten()
78
+ non_zero_values = flat_model[flat_model != 0]
79
+
80
+ if len(non_zero_values) > 0:
81
+ print(f" Min value (non-zero): {non_zero_values.min():.6f}")
82
+ print(f" Max value: {flat_model.max():.6f}")
83
+ print(f" Mean value (non-zero): {non_zero_values.mean():.6f}")
84
+ print(f" Std dev (non-zero): {non_zero_values.std():.6f}")
85
+
86
+ # Count of positive vs negative values
87
+ positive = np.sum(flat_model > 0)
88
+ negative = np.sum(flat_model < 0)
89
+ print(f" Positive values: {positive:,}")
90
+ print(f" Negative values: {negative:,}")
91
+
92
+ # Analyze experience replay if present
93
+ if 'experience' in data:
94
+ experience = data['experience']
95
+ print(f"\n🎮 Experience Replay Buffer:")
96
+ print(f" Number of experiences: {len(experience):,}")
97
+
98
+ if experience:
99
+ # Show first few experiences
100
+ print(f" Sample experience (first):")
101
+ if hasattr(experience[0], '__len__'):
102
+ print(f" Length: {len(experience[0])}")
103
+ if len(experience[0]) > 0:
104
+ print(f" First element type: {type(experience[0][0])}")
105
+
106
+ # Check for other data
107
+ for key in data.keys():
108
+ if key not in ['model', 'experience']:
109
+ value = data[key]
110
+ print(f"\n🔍 {key}:")
111
+ print(f" Type: {type(value)}")
112
+ if isinstance(value, (list, tuple, deque)):
113
+ print(f" Length: {len(value)}")
114
+ elif isinstance(value, dict):
115
+ print(f" Keys: {list(value.keys())[:5]}..." if len(value) > 5 else f" Keys: {list(value.keys())}")
116
+ elif isinstance(value, np.ndarray):
117
+ print(f" Shape: {value.shape}")
118
+
119
+ def examine_specific_states(model, num_states=5):
120
+ """
121
+ Examine specific state-action values in the model.
122
+
123
+ Args:
124
+ model (np.ndarray): The AI model
125
+ num_states (int): Number of states to examine
126
+ """
127
+
128
+ print(f"\n🔬 Examining {num_states} specific states:")
129
+ print("-"*40)
130
+
131
+ # Find states with non-zero values
132
+ non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
133
+
134
+ if len(non_zero_indices) > 0:
135
+ print(f"Found {len(non_zero_indices):,} states with learned values")
136
+
137
+ # Sample some states to examine
138
+ if len(non_zero_indices) > num_states:
139
+ sample_indices = np.random.choice(non_zero_indices, num_states, replace=False)
140
+ else:
141
+ sample_indices = non_zero_indices
142
+
143
+ for i, state_idx in enumerate(sample_indices):
144
+ q_values = model[state_idx]
145
+ non_zero_q = q_values[q_values != 0]
146
+
147
+ if len(non_zero_q) > 0:
148
+ print(f"\nState {i+1} (Index {state_idx}):")
149
+ print(f" Non-zero Q-values: {len(non_zero_q)}")
150
+ print(f" Actions with values:")
151
+ for action in np.where(q_values != 0)[0]:
152
+ print(f" Action {action}: {q_values[action]:.4f}")
153
+ else:
154
+ print("No states with learned values found yet.")
155
+
156
+ def decode_state(state_index):
157
+ """
158
+ Convert a state index back to a board representation.
159
+ This assumes the same encoding used in the game.
160
+
161
+ Args:
162
+ state_index (int): The encoded state index
163
+
164
+ Returns:
165
+ list: Board representation (0=empty, 1=X, 2=O)
166
+ """
167
+ board = [0] * 9
168
+ temp_index = state_index
169
+
170
+ for i in range(9):
171
+ board[i] = temp_index % 3
172
+ temp_index //= 3
173
+
174
+ return board
175
+
176
+ def display_board(board):
177
+ """
178
+ Display a Tic-Tac-Toe board in human-readable format.
179
+
180
+ Args:
181
+ board (list): Board representation
182
+ """
183
+ symbols = {0: '.', 1: 'X', 2: 'O'}
184
+
185
+ print("Board state:")
186
+ for row in range(3):
187
+ row_chars = [symbols[board[row*3 + col]] for col in range(3)]
188
+ print(" " + " | ".join(row_chars))
189
+ if row < 2:
190
+ print(" " + "-" * 9)
191
+
192
+ def explore_model_interactively(model):
193
+ """
194
+ Interactive exploration of the model.
195
+
196
+ Args:
197
+ model (np.ndarray): The AI model
198
+ """
199
+
200
+ print("\n🎯 INTERACTIVE EXPLORATION")
201
+ print("="*60)
202
+
203
+ while True:
204
+ print("\nOptions:")
205
+ print("1. Look up a specific state")
206
+ print("2. Find states with highest Q-values")
207
+ print("3. Find best action for a given state")
208
+ print("4. Exit exploration")
209
+
210
+ choice = input("\nEnter your choice (1-4): ").strip()
211
+
212
+ if choice == '1':
213
+ try:
214
+ state_idx = int(input("Enter state index (0-19682): "))
215
+ if 0 <= state_idx < model.shape[0]:
216
+ board = decode_state(state_idx)
217
+ display_board(board)
218
+
219
+ q_values = model[state_idx]
220
+ print(f"\nQ-values for state {state_idx}:")
221
+ for action in range(9):
222
+ if q_values[action] != 0:
223
+ print(f" Action {action} (row {action//3}, col {action%3}): {q_values[action]:.4f}")
224
+
225
+ # Show best action
226
+ best_action = np.argmax(q_values)
227
+ print(f"\nBest action: {best_action} (row {best_action//3}, col {best_action%3})")
228
+ else:
229
+ print("Invalid state index!")
230
+ except ValueError:
231
+ print("Please enter a valid number!")
232
+
233
+ elif choice == '2':
234
+ try:
235
+ num_states = int(input("How many top states? (1-100): "))
236
+ num_states = max(1, min(100, num_states))
237
+
238
+ # Find states with maximum Q-values
239
+ max_q_per_state = np.max(model, axis=1)
240
+ top_indices = np.argsort(max_q_per_state)[-num_states:][::-1]
241
+
242
+ print(f"\nTop {num_states} states with highest Q-values:")
243
+ for i, idx in enumerate(top_indices[:10]): # Show first 10
244
+ max_q = max_q_per_state[idx]
245
+ if max_q > 0:
246
+ board = decode_state(idx)
247
+ print(f"\n{i+1}. State {idx} (max Q: {max_q:.4f})")
248
+ display_board(board)
249
+ except ValueError:
250
+ print("Please enter a valid number!")
251
+
252
+ elif choice == '3':
253
+ # Create a board manually
254
+ print("\nEnter board state (9 numbers, 0=empty, 1=X, 2=O)")
255
+ print("Example: 0 0 1 0 2 0 0 0 0")
256
+
257
+ try:
258
+ board_input = input("Board: ").strip()
259
+ if len(board_input) == 0:
260
+ # Use default example
261
+ board = [0, 0, 1, 0, 2, 0, 0, 0, 0]
262
+ else:
263
+ board = [int(x) for x in board_input.split()]
264
+
265
+ if len(board) != 9 or any(x not in [0, 1, 2] for x in board):
266
+ raise ValueError("Invalid board")
267
+
268
+ display_board(board)
269
+
270
+ # Convert to state index
271
+ state_idx = 0
272
+ for i, cell in enumerate(board):
273
+ state_idx += cell * (3 ** i)
274
+
275
+ q_values = model[state_idx]
276
+
277
+ # Only show available moves
278
+ available_moves = [i for i, cell in enumerate(board) if cell == 0]
279
+
280
+ print("\nAvailable moves and their Q-values:")
281
+ for move in available_moves:
282
+ q_val = q_values[move]
283
+ row, col = divmod(move, 3)
284
+ print(f" Move {move} (row {row}, col {col}): {q_val:.4f}")
285
+
286
+ if available_moves:
287
+ best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
288
+ row, col = divmod(best_move, 3)
289
+ print(f"\nRecommended move: {best_move} (row {row}, col {col})")
290
+ else:
291
+ print("No available moves!")
292
+
293
+ except Exception as e:
294
+ print(f"Error: {e}")
295
+
296
+ elif choice == '4':
297
+ print("Exiting interactive exploration.")
298
+ break
299
+
300
+ else:
301
+ print("Invalid choice!")
302
+
303
+ def save_model_summary(data, filename="model_summary.json"):
304
+ """
305
+ Save a summary of the model to a JSON file.
306
+
307
+ Args:
308
+ data (dict): The loaded pickle data
309
+ filename (str): Output JSON filename
310
+ """
311
+ if not data:
312
+ return
313
+
314
+ summary = {}
315
+
316
+ if 'model' in data:
317
+ model = data['model']
318
+ summary['model'] = {
319
+ 'shape': model.shape,
320
+ 'size': int(model.size),
321
+ 'non_zero_entries': int(np.count_nonzero(model)),
322
+ 'sparsity': float((np.sum(model == 0) / model.size) * 100)
323
+ }
324
+
325
+ if 'experience' in data:
326
+ experience = data['experience']
327
+ summary['experience'] = {
328
+ 'count': len(experience)
329
+ }
330
+
331
+ try:
332
+ with open(filename, 'w') as f:
333
+ json.dump(summary, f, indent=2)
334
+ print(f"\n💾 Model summary saved to '{filename}'")
335
+ except Exception as e:
336
+ print(f"Error saving summary: {e}")
337
+
338
+ def main():
339
+ """
340
+ Main function to load and analyze the pickle file.
341
+ """
342
+ print("🔍 Tic-Tac-Toe AI Model Analyzer")
343
+ print("="*60)
344
+
345
+ # Try to load the pickle file
346
+ filename = "ttt_ai_model.pkl"
347
+ data = load_pickle_file(filename)
348
+
349
+ if data:
350
+ # Analyze the model
351
+ analyze_model(data)
352
+
353
+ # If model exists, do more detailed analysis
354
+ if 'model' in data:
355
+ # Examine specific states
356
+ examine_specific_states(data['model'])
357
+
358
+ # Interactive exploration
359
+ explore = input("\nWould you like to explore the model interactively? (y/n): ").strip().lower()
360
+ if explore == 'y':
361
+ explore_model_interactively(data['model'])
362
+
363
+ # Save summary
364
+ save = input("\nWould you like to save a summary? (y/n): ").strip().lower()
365
+ if save == 'y':
366
+ save_model_summary(data)
367
+
368
+ # Additional file info
369
+ print("\n📄 File Information:")
370
+ print(f" File size: {os.path.getsize(filename):,} bytes")
371
+ print(f" Last modified: {os.path.getmtime(filename):.0f}")
372
+
373
+ print("\n" + "="*60)
374
+ print("Analysis complete!")
375
+
376
+ if __name__ == "__main__":
377
+ main()
Imitation Learning Tic Tac Toe AI/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ PyQt5
2
+ numpy
Imitation Learning Tic Tac Toe AI/tic_tac_toe_ai.py ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import numpy as np
3
+ import random
4
+ from collections import deque
5
+ import pickle
6
+ import os
7
+ from PyQt5.QtWidgets import *
8
+ from PyQt5.QtCore import *
9
+ from PyQt5.QtGui import *
10
+
11
+ class TicTacToeAI:
12
+ """AI that learns by imitating human player moves"""
13
+
14
+ def __init__(self):
15
+ self.experience = deque(maxlen=10000)
16
+ self.state_history = []
17
+ self.move_history = []
18
+ self.model = self.create_model()
19
+ self.learning_rate = 0.1
20
+ self.epsilon = 0.3 # Exploration rate
21
+ self.load_data()
22
+
23
+ def create_model(self):
24
+ """Simple Q-learning style model"""
25
+ # State representation: 9 cells (0=empty, 1=X, 2=O)
26
+ # Action: 9 possible moves
27
+ return np.zeros((3**9, 9)) # Simplified representation
28
+
29
+ def state_to_index(self, board):
30
+ """Convert board state to a unique index"""
31
+ index = 0
32
+ for i, cell in enumerate(board):
33
+ index += cell * (3 ** i)
34
+ return index
35
+
36
+ def get_action(self, board, available_moves):
37
+ """Choose an action based on current policy"""
38
+ # Random exploration
39
+ if random.random() < self.epsilon:
40
+ return random.choice(available_moves)
41
+
42
+ # Exploitation: choose best learned move
43
+ state_idx = self.state_to_index(board)
44
+ q_values = self.model[state_idx]
45
+
46
+ # Filter available moves and choose best
47
+ available_q_values = [q_values[move] if move in available_moves else -float('inf')
48
+ for move in range(9)]
49
+ return np.argmax(available_q_values)
50
+
51
+ def record_move(self, board, move):
52
+ """Record state-action pair for learning"""
53
+ self.state_history.append(board.copy())
54
+ self.move_history.append(move)
55
+
56
+ def learn_from_game(self, winner):
57
+ """Learn from the completed game"""
58
+ if not self.state_history:
59
+ return
60
+
61
+ reward = 0.1 if winner == 'O' else -0.1 if winner == 'X' else 0.05
62
+
63
+ for i, (state, move) in enumerate(zip(self.state_history, self.move_history)):
64
+ state_idx = self.state_to_index(state)
65
+ self.model[state_idx][move] += self.learning_rate * reward
66
+
67
+ # Clear history for next game
68
+ self.state_history = []
69
+ self.move_history = []
70
+
71
+ self.save_data()
72
+
73
+ def save_data(self):
74
+ """Save learned model"""
75
+ try:
76
+ data = {
77
+ 'model': self.model,
78
+ 'experience': list(self.experience)
79
+ }
80
+ with open('ttt_ai_model.pkl', 'wb') as f:
81
+ pickle.dump(data, f)
82
+ except:
83
+ pass
84
+
85
+ def load_data(self):
86
+ """Load saved model"""
87
+ if os.path.exists('ttt_ai_model.pkl'):
88
+ try:
89
+ with open('ttt_ai_model.pkl', 'rb') as f:
90
+ data = pickle.load(f)
91
+ self.model = data.get('model', self.model)
92
+ self.experience = deque(data.get('experience', []), maxlen=10000)
93
+ except:
94
+ pass
95
+
96
+ class TicTacToeGame:
97
+ """Game logic"""
98
+
99
+ def __init__(self):
100
+ self.reset()
101
+
102
+ def reset(self):
103
+ self.board = [0] * 9 # 0=empty, 1=X, 2=O
104
+ self.current_player = 1 # X starts
105
+ self.winner = None
106
+ self.game_over = False
107
+ self.moves = 0
108
+
109
+ def make_move(self, position):
110
+ """Make a move at given position"""
111
+ if self.board[position] != 0 or self.game_over:
112
+ return False
113
+
114
+ self.board[position] = self.current_player
115
+ self.moves += 1
116
+
117
+ # Check for winner
118
+ self.winner = self.check_winner()
119
+ if self.winner or self.moves == 9:
120
+ self.game_over = True
121
+ else:
122
+ # Switch player
123
+ self.current_player = 3 - self.current_player # Switches between 1 and 2
124
+
125
+ return True
126
+
127
+ def check_winner(self):
128
+ """Check if there's a winner"""
129
+ winning_combinations = [
130
+ [0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
131
+ [0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
132
+ [0, 4, 8], [2, 4, 6] # Diagonals
133
+ ]
134
+
135
+ for combo in winning_combinations:
136
+ if (self.board[combo[0]] == self.board[combo[1]] ==
137
+ self.board[combo[2]] != 0):
138
+ return 'X' if self.board[combo[0]] == 1 else 'O'
139
+
140
+ return None
141
+
142
+ def get_available_moves(self):
143
+ """Get list of available positions"""
144
+ return [i for i, cell in enumerate(self.board) if cell == 0]
145
+
146
+ def get_board_state(self):
147
+ """Return copy of board"""
148
+ return self.board.copy()
149
+
150
+ class GameBoard(QWidget):
151
+ """Tic-Tac-Toe game board widget"""
152
+
153
+ def __init__(self):
154
+ super().__init__()
155
+ self.cell_size = 100
156
+ self.setFixedSize(self.cell_size * 3 + 20, self.cell_size * 3 + 20)
157
+
158
+ def paintEvent(self, event):
159
+ painter = QPainter(self)
160
+ painter.setRenderHint(QPainter.Antialiasing)
161
+
162
+ # Draw background
163
+ painter.fillRect(self.rect(), QColor(240, 240, 240))
164
+
165
+ # Draw grid
166
+ painter.setPen(QPen(QColor(0, 0, 0), 3))
167
+ for i in range(1, 3):
168
+ # Vertical lines
169
+ painter.drawLine(
170
+ self.cell_size * i + 10, 10,
171
+ self.cell_size * i + 10, self.cell_size * 3 + 10
172
+ )
173
+ # Horizontal lines
174
+ painter.drawLine(
175
+ 10, self.cell_size * i + 10,
176
+ self.cell_size * 3 + 10, self.cell_size * i + 10
177
+ )
178
+
179
+ # Draw X's and O's
180
+ if hasattr(self, 'game'):
181
+ for i in range(9):
182
+ row, col = divmod(i, 3)
183
+ x = col * self.cell_size + 10
184
+ y = row * self.cell_size + 10
185
+
186
+ if self.game.board[i] == 1: # X
187
+ painter.setPen(QPen(QColor(220, 50, 50), 4))
188
+ painter.drawLine(x + 20, y + 20, x + self.cell_size - 20, y + self.cell_size - 20)
189
+ painter.drawLine(x + self.cell_size - 20, y + 20, x + 20, y + self.cell_size - 20)
190
+ elif self.game.board[i] == 2: # O
191
+ painter.setPen(QPen(QColor(50, 50, 220), 4))
192
+ painter.drawEllipse(x + 20, y + 20, self.cell_size - 40, self.cell_size - 40)
193
+
194
+ # Draw winner line if exists
195
+ if hasattr(self, 'game') and self.game.winner:
196
+ self.draw_winner_line(painter)
197
+
198
+ def draw_winner_line(self, painter):
199
+ """Draw line through winning combination"""
200
+ winning_combinations = [
201
+ [0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
202
+ [0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
203
+ [0, 4, 8], [2, 4, 6] # Diagonals
204
+ ]
205
+
206
+ painter.setPen(QPen(QColor(0, 200, 0), 6))
207
+
208
+ for combo in winning_combinations:
209
+ if (self.game.board[combo[0]] == self.game.board[combo[1]] ==
210
+ self.game.board[combo[2]] != 0):
211
+ # Calculate positions
212
+ positions = []
213
+ for pos in combo:
214
+ row, col = divmod(pos, 3)
215
+ x = col * self.cell_size + self.cell_size // 2 + 10
216
+ y = row * self.cell_size + self.cell_size // 2 + 10
217
+ positions.append((x, y))
218
+
219
+ painter.drawLine(positions[0][0], positions[0][1],
220
+ positions[2][0], positions[2][1])
221
+ break
222
+
223
+ def mousePressEvent(self, event):
224
+ if event.button() == Qt.LeftButton:
225
+ x = event.x() - 10
226
+ y = event.y() - 10
227
+
228
+ if 0 <= x < self.cell_size * 3 and 0 <= y < self.cell_size * 3:
229
+ col = x // self.cell_size
230
+ row = y // self.cell_size
231
+ position = row * 3 + col
232
+
233
+ if hasattr(self, 'on_cell_clicked'):
234
+ self.on_cell_clicked(position)
235
+
236
+ class ImitationLearningGame(QMainWindow):
237
+ """Main game window with imitation learning"""
238
+
239
+ def __init__(self):
240
+ super().__init__()
241
+ self.game = TicTacToeGame()
242
+ self.ai = TicTacToeAI()
243
+ self.player_symbol = 1 # X
244
+ self.ai_symbol = 2 # O
245
+ self.training_mode = True
246
+ self.ai_turn = False
247
+ self.games_played = 0
248
+ self.ai_wins = 0
249
+ self.player_wins = 0
250
+ self.ties = 0
251
+
252
+ self.init_ui()
253
+ self.start_new_game()
254
+
255
+ def init_ui(self):
256
+ self.setWindowTitle('Imitation Learning Tic-Tac-Toe')
257
+ self.setGeometry(100, 100, 400, 500)
258
+
259
+ # Central widget
260
+ central_widget = QWidget()
261
+ self.setCentralWidget(central_widget)
262
+ layout = QVBoxLayout()
263
+
264
+ # Game board
265
+ self.board_widget = GameBoard()
266
+ self.board_widget.on_cell_clicked = self.handle_cell_click
267
+ self.board_widget.game = self.game
268
+ layout.addWidget(self.board_widget)
269
+
270
+ # Info panel
271
+ info_layout = QHBoxLayout()
272
+
273
+ self.status_label = QLabel("Your turn (X)")
274
+ self.status_label.setFont(QFont('Arial', 14))
275
+ info_layout.addWidget(self.status_label)
276
+
277
+ info_layout.addStretch()
278
+
279
+ self.stats_label = QLabel("Games: 0 | AI Wins: 0 | Your Wins: 0 | Ties: 0")
280
+ self.stats_label.setFont(QFont('Arial', 10))
281
+ info_layout.addWidget(self.stats_label)
282
+
283
+ layout.addLayout(info_layout)
284
+
285
+ # Control panel
286
+ control_layout = QHBoxLayout()
287
+
288
+ self.train_button = QPushButton("Toggle Training: ON")
289
+ self.train_button.clicked.connect(self.toggle_training)
290
+ control_layout.addWidget(self.train_button)
291
+
292
+ self.new_game_button = QPushButton("New Game")
293
+ self.new_game_button.clicked.connect(self.start_new_game)
294
+ control_layout.addWidget(self.new_game_button)
295
+
296
+ self.reset_ai_button = QPushButton("Reset AI")
297
+ self.reset_ai_button.clicked.connect(self.reset_ai)
298
+ control_layout.addWidget(self.reset_ai_button)
299
+
300
+ layout.addLayout(control_layout)
301
+
302
+ # Learning parameters
303
+ param_layout = QHBoxLayout()
304
+
305
+ param_layout.addWidget(QLabel("Learning Rate:"))
306
+ self.learning_rate_slider = QSlider(Qt.Horizontal)
307
+ self.learning_rate_slider.setRange(1, 20)
308
+ self.learning_rate_slider.setValue(10)
309
+ self.learning_rate_slider.valueChanged.connect(self.update_learning_rate)
310
+ param_layout.addWidget(self.learning_rate_slider)
311
+
312
+ param_layout.addWidget(QLabel("Exploration:"))
313
+ self.exploration_slider = QSlider(Qt.Horizontal)
314
+ self.exploration_slider.setRange(0, 100)
315
+ self.exploration_slider.setValue(30)
316
+ self.exploration_slider.valueChanged.connect(self.update_exploration)
317
+ param_layout.addWidget(self.exploration_slider)
318
+
319
+ layout.addLayout(param_layout)
320
+
321
+ # Learning info
322
+ self.learning_info = QTextEdit()
323
+ self.learning_info.setMaximumHeight(100)
324
+ self.learning_info.setReadOnly(True)
325
+ layout.addWidget(self.learning_info)
326
+
327
+ central_widget.setLayout(layout)
328
+
329
+ # Timer for AI moves
330
+ self.ai_timer = QTimer()
331
+ self.ai_timer.timeout.connect(self.make_ai_move)
332
+
333
+ self.add_log("AI initialized. Start playing to train the AI!")
334
+
335
+ def start_new_game(self):
336
+ self.game.reset()
337
+ self.ai_turn = False # Player starts
338
+ self.status_label.setText("Your turn (X)")
339
+ self.board_widget.update()
340
+
341
+ if self.ai_turn:
342
+ self.ai_timer.start(500) # AI moves after 0.5 seconds
343
+
344
+ def toggle_training(self):
345
+ self.training_mode = not self.training_mode
346
+ self.train_button.setText(f"Toggle Training: {'ON' if self.training_mode else 'OFF'}")
347
+ self.add_log(f"Training mode {'enabled' if self.training_mode else 'disabled'}")
348
+
349
+ def reset_ai(self):
350
+ self.ai = TicTacToeAI()
351
+ self.games_played = 0
352
+ self.ai_wins = 0
353
+ self.player_wins = 0
354
+ self.ties = 0
355
+ self.update_stats()
356
+ self.add_log("AI has been reset. Starting fresh learning!")
357
+
358
+ def update_learning_rate(self, value):
359
+ self.ai.learning_rate = value / 100.0
360
+ self.add_log(f"Learning rate set to {self.ai.learning_rate:.2f}")
361
+
362
+ def update_exploration(self, value):
363
+ self.ai.epsilon = value / 100.0
364
+ self.add_log(f"Exploration rate set to {self.ai.epsilon:.2f}")
365
+
366
+ def handle_cell_click(self, position):
367
+ if self.game.game_over or self.ai_turn:
368
+ return
369
+
370
+ if self.game.make_move(position):
371
+ # Record AI's learning from opponent move
372
+ if self.training_mode:
373
+ self.ai.record_move(self.game.get_board_state(), position)
374
+
375
+ self.board_widget.update()
376
+
377
+ if self.game.game_over:
378
+ self.end_game()
379
+ else:
380
+ # Switch to AI turn
381
+ self.ai_turn = True
382
+ self.status_label.setText("AI thinking...")
383
+ self.ai_timer.start(500) # AI moves after 0.5 seconds
384
+
385
+ def make_ai_move(self):
386
+ self.ai_timer.stop()
387
+
388
+ if self.game.game_over:
389
+ return
390
+
391
+ available_moves = self.game.get_available_moves()
392
+ if not available_moves:
393
+ return
394
+
395
+ # Get AI move
396
+ ai_move = self.ai.get_action(self.game.get_board_state(), available_moves)
397
+
398
+ if self.game.make_move(ai_move):
399
+ # Record AI's own move for learning
400
+ if self.training_mode:
401
+ self.ai.record_move(self.game.get_board_state(), ai_move)
402
+
403
+ self.board_widget.update()
404
+
405
+ if self.game.game_over:
406
+ self.end_game()
407
+ else:
408
+ self.ai_turn = False
409
+ self.status_label.setText("Your turn (X)")
410
+
411
+ def end_game(self):
412
+ winner = self.game.winner
413
+
414
+ # Update statistics
415
+ self.games_played += 1
416
+ if winner == 'X':
417
+ self.player_wins += 1
418
+ result_text = "You win!"
419
+ elif winner == 'O':
420
+ self.ai_wins += 1
421
+ result_text = "AI wins!"
422
+ else:
423
+ self.ties += 1
424
+ result_text = "It's a tie!"
425
+
426
+ # AI learns from the game
427
+ if self.training_mode:
428
+ self.ai.learn_from_game(winner)
429
+ self.add_log(f"Game {self.games_played}: {result_text} AI learning updated.")
430
+ else:
431
+ self.add_log(f"Game {self.games_played}: {result_text}")
432
+
433
+ self.status_label.setText(result_text)
434
+ self.update_stats()
435
+
436
+ # Show end game dialog
437
+ msg = QMessageBox()
438
+ msg.setWindowTitle("Game Over")
439
+ msg.setText(result_text)
440
+ msg.setInformativeText("Do you want to play again?")
441
+ msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
442
+
443
+ if msg.exec_() == QMessageBox.Yes:
444
+ self.start_new_game()
445
+
446
+ def update_stats(self):
447
+ self.stats_label.setText(
448
+ f"Games: {self.games_played} | "
449
+ f"AI Wins: {self.ai_wins} | "
450
+ f"Your Wins: {self.player_wins} | "
451
+ f"Ties: {self.ties}"
452
+ )
453
+
454
+ def add_log(self, message):
455
+ self.learning_info.append(f"[Game {self.games_played}] {message}")
456
+
457
+ def main():
458
+ app = QApplication(sys.argv)
459
+ app.setStyle('Fusion')
460
+
461
+ # Set dark theme
462
+ palette = QPalette()
463
+ palette.setColor(QPalette.Window, QColor(53, 53, 53))
464
+ palette.setColor(QPalette.WindowText, Qt.white)
465
+ palette.setColor(QPalette.Base, QColor(25, 25, 25))
466
+ palette.setColor(QPalette.AlternateBase, QColor(53, 53, 53))
467
+ palette.setColor(QPalette.ToolTipBase, Qt.white)
468
+ palette.setColor(QPalette.ToolTipText, Qt.white)
469
+ palette.setColor(QPalette.Text, Qt.white)
470
+ palette.setColor(QPalette.Button, QColor(53, 53, 53))
471
+ palette.setColor(QPalette.ButtonText, Qt.white)
472
+ palette.setColor(QPalette.BrightText, Qt.red)
473
+ palette.setColor(QPalette.Link, QColor(42, 130, 218))
474
+ palette.setColor(QPalette.Highlight, QColor(42, 130, 218))
475
+ palette.setColor(QPalette.HighlightedText, Qt.black)
476
+ app.setPalette(palette)
477
+
478
+ game = ImitationLearningGame()
479
+ game.show()
480
+ sys.exit(app.exec_())
481
+
482
+ if __name__ == '__main__':
483
+ main()
Imitation Learning Tic Tac Toe AI/ttt_ai_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb75c4a0df6e020246eee77f4304d89180acc388c925abfa27f4236c94279ec
3
+ size 1417366
Imitation Learning Tic Tac Toe AI/ttt_diagnostic.py ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import os
4
+
5
+ def load_ai_model():
6
+ """Load the AI model from pickle file"""
7
+ filename = "ttt_ai_model.pkl"
8
+ if not os.path.exists(filename):
9
+ print("Model file not found!")
10
+ return None
11
+
12
+ with open(filename, 'rb') as f:
13
+ data = pickle.load(f)
14
+
15
+ return data['model']
16
+
17
+ def decode_state(state_index):
18
+ """Convert state index to board representation"""
19
+ board = [0] * 9
20
+ temp_index = state_index
21
+
22
+ for i in range(9):
23
+ board[i] = temp_index % 3
24
+ temp_index //= 3
25
+
26
+ return board
27
+
28
+ def display_board(board):
29
+ """Display Tic-Tac-Toe board"""
30
+ symbols = {0: '.', 1: 'X', 2: 'O'}
31
+
32
+ print("Current board:")
33
+ for row in range(3):
34
+ row_chars = [symbols[board[row*3 + col]] for col in range(3)]
35
+ print(" " + " | ".join(row_chars))
36
+ if row < 2:
37
+ print(" " + "-" * 9)
38
+
39
+ def test_ai_with_common_scenarios(model):
40
+ """Test AI with common Tic-Tac-Toe scenarios"""
41
+
42
+ print("\n🤖 TESTING AI WITH COMMON SCENARIOS")
43
+ print("="*50)
44
+
45
+ test_cases = [
46
+ # Empty board
47
+ {
48
+ "name": "Empty board - first move",
49
+ "board": [0,0,0,0,0,0,0,0,0],
50
+ "expected": "Center (4) or corners (0,2,6,8)"
51
+ },
52
+ # Center taken by opponent
53
+ {
54
+ "name": "Opponent took center",
55
+ "board": [0,0,0,0,1,0,0,0,0],
56
+ "expected": "A corner (0,2,6,8)"
57
+ },
58
+ # Winning move for AI
59
+ {
60
+ "name": "AI can win in one move",
61
+ "board": [2,1,0, # O X .
62
+ 1,2,0, # X O .
63
+ 0,0,0], # . . .
64
+ "expected": "Move 8 to complete diagonal"
65
+ },
66
+ # Block opponent's winning move
67
+ {
68
+ "name": "Block opponent's winning move",
69
+ "board": [1,0,0, # X . .
70
+ 1,2,0, # X O .
71
+ 0,0,0], # . . .
72
+ "expected": "Move 6 to block vertical"
73
+ },
74
+ # Fork opportunity
75
+ {
76
+ "name": "Fork opportunity",
77
+ "board": [2,0,1, # O . X
78
+ 0,1,0, # . X .
79
+ 0,0,0], # . . .
80
+ "expected": "Move 8 to create fork"
81
+ }
82
+ ]
83
+
84
+ for test in test_cases:
85
+ print(f"\n📋 {test['name']}")
86
+ display_board(test['board'])
87
+
88
+ # Convert board to state index
89
+ state_idx = 0
90
+ for i, cell in enumerate(test['board']):
91
+ state_idx += cell * (3 ** i)
92
+
93
+ # Get Q-values for this state
94
+ q_values = model[state_idx]
95
+
96
+ # Get available moves
97
+ available_moves = [i for i, cell in enumerate(test['board']) if cell == 0]
98
+
99
+ if available_moves:
100
+ print("\nAvailable moves and Q-values:")
101
+ for move in available_moves:
102
+ q_val = q_values[move]
103
+ row, col = divmod(move, 3)
104
+ symbol = "⚠️" if q_val > 0 else " "
105
+ print(f"{symbol} Move {move} (row {row}, col {col}): {q_val:.4f}")
106
+
107
+ # AI's recommended move
108
+ best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
109
+ row, col = divmod(best_move, 3)
110
+ print(f"\n🤖 AI's choice: Move {best_move} (row {row}, col {col})")
111
+ print(f"📋 Expected: {test['expected']}")
112
+
113
+ # Check if AI learned something useful
114
+ best_q = q_values[best_move]
115
+ if best_q > 0:
116
+ print("✅ AI has positive association with this move")
117
+ elif best_q < 0:
118
+ print("❌ AI has negative association with this move (thinks it's bad)")
119
+ else:
120
+ print("➖ AI has no learning for this move")
121
+ else:
122
+ print("No available moves!")
123
+
124
+ def analyze_learning_patterns(model):
125
+ """Analyze what patterns the AI has learned"""
126
+
127
+ print("\n🔍 ANALYZING LEARNING PATTERNS")
128
+ print("="*50)
129
+
130
+ # Find all states with non-zero Q-values
131
+ non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
132
+
133
+ print(f"Total states with learning: {len(non_zero_indices)}")
134
+
135
+ # Categorize by game phase
136
+ phases = {
137
+ "early": [], # 0-2 moves made
138
+ "mid": [], # 3-5 moves made
139
+ "late": [] # 6-8 moves made
140
+ }
141
+
142
+ for idx in non_zero_indices:
143
+ board = decode_state(idx)
144
+ moves_made = sum(1 for cell in board if cell != 0)
145
+
146
+ if moves_made <= 2:
147
+ phases["early"].append(idx)
148
+ elif moves_made <= 5:
149
+ phases["mid"].append(idx)
150
+ else:
151
+ phases["late"].append(idx)
152
+
153
+ print(f"\nLearning by game phase:")
154
+ print(f" Early game (0-2 moves): {len(phases['early'])} states")
155
+ print(f" Mid game (3-5 moves): {len(phases['mid'])} states")
156
+ print(f" Late game (6-8 moves): {len(phases['late'])} states")
157
+
158
+ # Analyze Q-value distribution
159
+ all_q_values = model[non_zero_indices].flatten()
160
+ non_zero_q = all_q_values[all_q_values != 0]
161
+
162
+ if len(non_zero_q) > 0:
163
+ print(f"\nQ-value analysis:")
164
+ print(f" Total Q-values: {len(non_zero_q)}")
165
+ print(f" Positive Q-values: {np.sum(non_zero_q > 0)}")
166
+ print(f" Negative Q-values: {np.sum(non_zero_q < 0)}")
167
+ print(f" Average Q-value: {np.mean(non_zero_q):.4f}")
168
+ print(f" Most positive: {np.max(non_zero_q):.4f}")
169
+ print(f" Most negative: {np.min(non_zero_q):.4f}")
170
+
171
+ # Show examples of what AI learned
172
+ print("\n📚 Examples of learned states:")
173
+
174
+ # Find states with positive Q-values
175
+ positive_states = []
176
+ for idx in non_zero_indices:
177
+ if np.any(model[idx] > 0):
178
+ positive_states.append(idx)
179
+
180
+ if positive_states:
181
+ print(f"\nFound {len(positive_states)} states with positive associations")
182
+ for i, idx in enumerate(positive_states[:3]): # Show first 3
183
+ board = decode_state(idx)
184
+ print(f"\nExample {i+1}:")
185
+ display_board(board)
186
+
187
+ q_values = model[idx]
188
+ positive_moves = np.where(q_values > 0)[0]
189
+ print("Moves AI thinks are good:")
190
+ for move in positive_moves:
191
+ print(f" Move {move} (row {move//3}, col {move%3}): {q_values[move]:.4f}")
192
+ else:
193
+ print("No positive associations found - AI hasn't learned winning strategies yet")
194
+
195
+ def check_for_specific_patterns(model):
196
+ """Check if AI has learned specific Tic-Tac-Toe strategies"""
197
+
198
+ print("\n🎯 CHECKING FOR SPECIFIC STRATEGIES")
199
+ print("="*50)
200
+
201
+ strategies = {
202
+ "prefer_center": 0,
203
+ "prefer_corners": 0,
204
+ "prefer_edges": 0,
205
+ "block_opponent": 0,
206
+ "create_fork": 0,
207
+ "avoid_losing": 0
208
+ }
209
+
210
+ # Check common winning/blocking patterns
211
+ patterns_to_check = [
212
+ # Center preference
213
+ ([0,0,0,0,0,0,0,0,0], [4], "prefer_center"),
214
+
215
+ # Corner openings
216
+ ([0,0,0,0,1,0,0,0,0], [0,2,6,8], "prefer_corners"),
217
+
218
+ # Block vertical
219
+ ([1,0,0,1,2,0,0,0,0], [6], "block_opponent"),
220
+
221
+ # Block horizontal
222
+ ([1,1,0,0,2,0,0,0,0], [2], "block_opponent"),
223
+
224
+ # Block diagonal
225
+ ([1,0,0,0,1,0,0,0,0], [8], "block_opponent"),
226
+ ]
227
+
228
+ for board_pattern, good_moves, strategy in patterns_to_check:
229
+ state_idx = 0
230
+ for i, cell in enumerate(board_pattern):
231
+ state_idx += cell * (3 ** i)
232
+
233
+ q_values = model[state_idx]
234
+ available_moves = [i for i, cell in enumerate(board_pattern) if cell == 0]
235
+
236
+ if available_moves:
237
+ # Check if AI prefers any of the good moves
238
+ best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
239
+ if best_move in good_moves:
240
+ strategies[strategy] += 1
241
+ print(f"✅ AI correctly prefers {strategy.replace('_', ' ')} in this scenario")
242
+ else:
243
+ print(f"❌ AI doesn't recognize {strategy.replace('_', ' ')}")
244
+
245
+ print(f"\nStrategy recognition summary:")
246
+ for strategy, count in strategies.items():
247
+ print(f" {strategy}: {count}/1")
248
+
249
+ def train_ai_offline(model, num_games=1000):
250
+ """Simulate games to improve the AI offline"""
251
+
252
+ print("\n🎮 SIMULATING OFFLINE TRAINING")
253
+ print("="*50)
254
+
255
+ print(f"Simulating {num_games} games of self-play...")
256
+
257
+ # Simple self-play simulation
258
+ import random
259
+
260
+ # We'll create a simple improvement by adding some basic strategies
261
+ original_non_zero = np.count_nonzero(model)
262
+
263
+ # Add some basic Tic-Tac-Toe knowledge
264
+ # Center is good
265
+ empty_board_idx = 0 # All zeros
266
+ model[empty_board_idx][4] = 0.1 # Center is good
267
+
268
+ # Corners are good when center is taken
269
+ center_taken_idx = 3**4 # Only center is 1
270
+ for corner in [0, 2, 6, 8]:
271
+ model[center_taken_idx][corner] = 0.08
272
+
273
+ # Blocking is good
274
+ # Example: opponent has two in a row
275
+ for i in range(9):
276
+ board = [0] * 9
277
+ board[i] = 1
278
+ board[(i+3)%9] = 1
279
+ if board[6] == 0: # Check if third in column is empty
280
+ state_idx = 0
281
+ for j, cell in enumerate(board):
282
+ state_idx += cell * (3 ** j)
283
+ blocking_move = 6
284
+ model[state_idx][blocking_move] = 0.15
285
+
286
+ new_non_zero = np.count_nonzero(model)
287
+ improvement = new_non_zero - original_non_zero
288
+
289
+ print(f"Added {improvement} new learned values")
290
+ print("Basic Tic-Tac-Toe strategies have been added to the AI")
291
+
292
+ return model
293
+
294
+ def save_improved_model(model):
295
+ """Save the improved model"""
296
+ filename = "ttt_ai_model_improved.pkl"
297
+
298
+ # Load existing data to preserve experience buffer
299
+ original_filename = "ttt_ai_model.pkl"
300
+ if os.path.exists(original_filename):
301
+ with open(original_filename, 'rb') as f:
302
+ data = pickle.load(f)
303
+ else:
304
+ data = {'model': model, 'experience': []}
305
+
306
+ data['model'] = model
307
+
308
+ with open(filename, 'wb') as f:
309
+ pickle.dump(data, f)
310
+
311
+ print(f"\n💾 Improved model saved to '{filename}'")
312
+
313
+ def main():
314
+ """Main function to analyze and improve the AI"""
315
+
316
+ print("🤖 TIC-TAC-TOE AI DIAGNOSTIC TOOL")
317
+ print("="*60)
318
+
319
+ # Load the model
320
+ model = load_ai_model()
321
+ if model is None:
322
+ return
323
+
324
+ # Test with common scenarios
325
+ test_ai_with_common_scenarios(model)
326
+
327
+ # Analyze learning patterns
328
+ analyze_learning_patterns(model)
329
+
330
+ # Check for specific strategies
331
+ check_for_specific_patterns(model)
332
+
333
+ # Offer to improve the AI
334
+ print("\n" + "="*60)
335
+ improve = input("\nWould you like to add basic Tic-Tac-Toe knowledge to the AI? (y/n): ").strip().lower()
336
+
337
+ if improve == 'y':
338
+ model = train_ai_offline(model)
339
+ save_improved_model(model)
340
+ print("\n✅ AI has been improved with basic strategies!")
341
+ print("Restart the game and use 'ttt_ai_model_improved.pkl' for better performance")
342
+ else:
343
+ print("\n📝 Recommendations for improving the AI through gameplay:")
344
+ print("1. Play more games against the AI")
345
+ print("2. Let the AI watch you play against itself")
346
+ print("3. Adjust learning rate to 0.2-0.3 for faster learning")
347
+ print("4. Reduce exploration rate to 0.1 once AI starts winning")
348
+ print("5. Play both as X and O to teach both perspectives")
349
+
350
+ if __name__ == "__main__":
351
+ main()
Tic Tac Toe RL/app.py ADDED
@@ -0,0 +1,721 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import random
4
+ import numpy as np
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ from collections import deque
8
+ import datetime
9
+ import csv
10
+ import logging
11
+ from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
12
+ QHBoxLayout, QGridLayout, QPushButton, QLabel,
13
+ QTextEdit, QTabWidget, QGroupBox, QSpinBox,
14
+ QDoubleSpinBox, QCheckBox, QProgressBar, QComboBox)
15
+ from PyQt5.QtCore import QTimer, Qt, QThread, pyqtSignal
16
+ from PyQt5.QtGui import QFont, QPalette, QColor
17
+ import torch
18
+ import torch.nn as nn
19
+ import torch.optim as optim
20
+ import torch.nn.functional as F
21
+
22
+ # Configure logging
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(levelname)s - %(message)s',
26
+ handlers=[
27
+ logging.FileHandler('tic_tac_toe_training.log'),
28
+ logging.StreamHandler()
29
+ ]
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+ class DuelingDQN(nn.Module):
34
+ def __init__(self, input_size, output_size, hidden_size=256):
35
+ super(DuelingDQN, self).__init__()
36
+ self.input_size = input_size
37
+ self.output_size = output_size
38
+
39
+ # Feature layer
40
+ self.feature = nn.Sequential(
41
+ nn.Linear(input_size, hidden_size),
42
+ nn.ReLU(),
43
+ nn.Linear(hidden_size, hidden_size),
44
+ nn.ReLU(),
45
+ nn.Linear(hidden_size, hidden_size // 2),
46
+ nn.ReLU()
47
+ )
48
+
49
+ # Value stream
50
+ self.value_stream = nn.Sequential(
51
+ nn.Linear(hidden_size // 2, hidden_size // 4),
52
+ nn.ReLU(),
53
+ nn.Linear(hidden_size // 4, 1)
54
+ )
55
+
56
+ # Advantage stream
57
+ self.advantage_stream = nn.Sequential(
58
+ nn.Linear(hidden_size // 2, hidden_size // 4),
59
+ nn.ReLU(),
60
+ nn.Linear(hidden_size // 4, output_size)
61
+ )
62
+
63
+ def forward(self, state):
64
+ features = self.feature(state)
65
+ value = self.value_stream(features)
66
+ advantage = self.advantage_stream(features)
67
+
68
+ # Combine value and advantage
69
+ q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
70
+ return q_values
71
+
72
+ class TicTacToeEnvironment:
73
+ def __init__(self):
74
+ self.reset()
75
+
76
+ def reset(self):
77
+ self.board = np.zeros(9, dtype=int) # 0: empty, 1: X, 2: O
78
+ self.current_player = 1 # X starts
79
+ self.done = False
80
+ self.winner = 0
81
+ return self.get_state()
82
+
83
+ def get_state(self):
84
+ # Return board state as one-hot encoded
85
+ state = np.zeros(9 * 3, dtype=np.float32)
86
+ for i in range(9):
87
+ if self.board[i] == 0:
88
+ state[i * 3] = 1.0
89
+ elif self.board[i] == 1:
90
+ state[i * 3 + 1] = 1.0
91
+ else:
92
+ state[i * 3 + 2] = 1.0
93
+ return state
94
+
95
+ def get_valid_moves(self):
96
+ return [i for i in range(9) if self.board[i] == 0]
97
+
98
+ def step(self, action):
99
+ if self.done:
100
+ return self.get_state(), 0, True, {}
101
+
102
+ if self.board[action] != 0:
103
+ return self.get_state(), -5, True, {} # Invalid move penalty
104
+
105
+ # Make move
106
+ self.board[action] = self.current_player
107
+
108
+ # Check for win
109
+ if self.check_win(self.current_player):
110
+ self.done = True
111
+ self.winner = self.current_player
112
+ reward = 10 # Win reward
113
+ # Check for draw
114
+ elif len(self.get_valid_moves()) == 0:
115
+ self.done = True
116
+ reward = 2 # Draw reward
117
+ else:
118
+ reward = 0.1 # Small reward for valid move
119
+ self.current_player = 3 - self.current_player # Switch player (1->2, 2->1)
120
+
121
+ return self.get_state(), reward, self.done, {'winner': self.winner}
122
+
123
+ def check_win(self, player):
124
+ winning_combinations = [
125
+ [0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows
126
+ [0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns
127
+ [0, 4, 8], [2, 4, 6] # Diagonals
128
+ ]
129
+
130
+ for combo in winning_combinations:
131
+ if all(self.board[i] == player for i in combo):
132
+ return True
133
+ return False
134
+
135
+ def render(self):
136
+ symbols = {0: ' ', 1: 'X', 2: 'O'}
137
+ board_str = ""
138
+ for i in range(3):
139
+ row = [symbols[self.board[i*3 + j]] for j in range(3)]
140
+ board_str += " " + " | ".join(row) + " \n"
141
+ if i < 2:
142
+ board_str += "-----------\n"
143
+ return board_str
144
+
145
+ class DuelingDQNAgent:
146
+ def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99,
147
+ epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.999,
148
+ target_update=1000, device='auto'):
149
+ self.state_size = state_size
150
+ self.action_size = action_size
151
+ self.learning_rate = learning_rate
152
+ self.gamma = gamma
153
+ self.epsilon = epsilon_start
154
+ self.epsilon_start = epsilon_start
155
+ self.epsilon_end = epsilon_end
156
+ self.epsilon_decay = epsilon_decay
157
+ self.target_update = target_update
158
+ self.memory = deque(maxlen=50000)
159
+ self.batch_size = 128
160
+ self.learn_step_counter = 0
161
+
162
+ # Device selection with MPS priority
163
+ if device == 'auto':
164
+ if torch.backends.mps.is_available():
165
+ self.device = torch.device("mps")
166
+ logger.info("Using MPS device (Apple Silicon)")
167
+ elif torch.cuda.is_available():
168
+ self.device = torch.device("cuda")
169
+ logger.info("Using CUDA device")
170
+ else:
171
+ self.device = torch.device("cpu")
172
+ logger.info("Using CPU device")
173
+ else:
174
+ self.device = torch.device(device)
175
+
176
+ # Networks
177
+ self.policy_net = DuelingDQN(state_size, action_size).to(self.device)
178
+ self.target_net = DuelingDQN(state_size, action_size).to(self.device)
179
+ self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate, weight_decay=1e-5)
180
+
181
+ self.update_target_network()
182
+
183
+ def update_target_network(self):
184
+ self.target_net.load_state_dict(self.policy_net.state_dict())
185
+
186
+ def remember(self, state, action, reward, next_state, done):
187
+ self.memory.append((state, action, reward, next_state, done))
188
+
189
+ def act(self, state, valid_moves, training=True):
190
+ if training and random.random() <= self.epsilon:
191
+ return random.choice(valid_moves)
192
+
193
+ state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
194
+ with torch.no_grad():
195
+ q_values = self.policy_net(state_tensor)
196
+
197
+ # Mask invalid moves
198
+ q_values_np = q_values.cpu().numpy()[0]
199
+ masked_q_values = q_values_np.copy()
200
+ for i in range(self.action_size):
201
+ if i not in valid_moves:
202
+ masked_q_values[i] = -float('inf')
203
+
204
+ return np.argmax(masked_q_values)
205
+
206
+ def replay(self):
207
+ if len(self.memory) < self.batch_size:
208
+ return 0
209
+
210
+ batch = random.sample(self.memory, self.batch_size)
211
+ states, actions, rewards, next_states, dones = zip(*batch)
212
+
213
+ states = torch.FloatTensor(np.array(states)).to(self.device)
214
+ actions = torch.LongTensor(actions).to(self.device)
215
+ rewards = torch.FloatTensor(rewards).to(self.device)
216
+ next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
217
+ dones = torch.BoolTensor(dones).to(self.device)
218
+
219
+ current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
220
+
221
+ with torch.no_grad():
222
+ next_actions = self.policy_net(next_states).max(1)[1]
223
+ next_q_values = self.target_net(next_states).gather(1, next_actions.unsqueeze(1))
224
+ target_q_values = rewards.unsqueeze(1) + (self.gamma * next_q_values * ~dones.unsqueeze(1))
225
+
226
+ loss = F.smooth_l1_loss(current_q_values, target_q_values)
227
+
228
+ self.optimizer.zero_grad()
229
+ loss.backward()
230
+ # Gradient clipping
231
+ torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
232
+ self.optimizer.step()
233
+
234
+ # Update target network
235
+ self.learn_step_counter += 1
236
+ if self.learn_step_counter % self.target_update == 0:
237
+ self.update_target_network()
238
+
239
+ # Decay epsilon
240
+ if self.epsilon > self.epsilon_end:
241
+ self.epsilon *= self.epsilon_decay
242
+
243
+ return loss.item()
244
+
245
+ def save_model(self, filepath):
246
+ torch.save({
247
+ 'policy_net_state_dict': self.policy_net.state_dict(),
248
+ 'target_net_state_dict': self.target_net.state_dict(),
249
+ 'optimizer_state_dict': self.optimizer.state_dict(),
250
+ 'epsilon': self.epsilon,
251
+ 'memory': list(self.memory)[-10000:] # Save recent memory
252
+ }, filepath)
253
+ logger.info(f"Model saved to {filepath}")
254
+
255
+ def load_model(self, filepath):
256
+ if os.path.exists(filepath):
257
+ checkpoint = torch.load(filepath, map_location=self.device)
258
+ self.policy_net.load_state_dict(checkpoint['policy_net_state_dict'])
259
+ self.target_net.load_state_dict(checkpoint['target_net_state_dict'])
260
+ self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
261
+ self.epsilon = checkpoint['epsilon']
262
+ if 'memory' in checkpoint:
263
+ self.memory = deque(checkpoint['memory'], maxlen=50000)
264
+ logger.info(f"Model loaded from {filepath}")
265
+ return True
266
+ return False
267
+
268
+ class TrainingThread(QThread):
269
+ update_signal = pyqtSignal(dict)
270
+ finished_signal = pyqtSignal()
271
+
272
+ def __init__(self, agent, env, episodes=1000, save_interval=100):
273
+ super().__init__()
274
+ self.agent = agent
275
+ self.env = env
276
+ self.episodes = episodes
277
+ self.save_interval = save_interval
278
+ self.running = True
279
+ self.stats = {
280
+ 'episodes': 0,
281
+ 'wins': 0,
282
+ 'losses': 0,
283
+ 'draws': 0,
284
+ 'avg_reward': 0,
285
+ 'epsilon': agent.epsilon,
286
+ 'loss': 0
287
+ }
288
+ self.rewards = []
289
+ self.wins = []
290
+ self.losses = []
291
+ self.draws = []
292
+
293
+ def run(self):
294
+ logger.info(f"Starting training for {self.episodes} episodes")
295
+
296
+ for episode in range(self.episodes):
297
+ if not self.running:
298
+ break
299
+
300
+ state = self.env.reset()
301
+ total_reward = 0
302
+ steps = 0
303
+
304
+ while True:
305
+ valid_moves = self.env.get_valid_moves()
306
+ if not valid_moves:
307
+ break
308
+
309
+ action = self.agent.act(state, valid_moves)
310
+ next_state, reward, done, info = self.env.step(action)
311
+
312
+ self.agent.remember(state, action, reward, next_state, done)
313
+ loss = self.agent.replay()
314
+
315
+ total_reward += reward
316
+ steps += 1
317
+ state = next_state
318
+
319
+ if done:
320
+ # Update statistics
321
+ if info.get('winner') == 1: # Agent win
322
+ self.stats['wins'] += 1
323
+ elif info.get('winner') == 2: # Opponent win
324
+ self.stats['losses'] += 1
325
+ else: # Draw
326
+ self.stats['draws'] += 1
327
+
328
+ self.stats['loss'] = loss if loss else 0
329
+ break
330
+
331
+ # Update statistics
332
+ self.stats['episodes'] = episode + 1
333
+ self.stats['epsilon'] = self.agent.epsilon
334
+ self.rewards.append(total_reward)
335
+ self.stats['avg_reward'] = np.mean(self.rewards[-100:]) if self.rewards else 0
336
+
337
+ # Save data periodically
338
+ if (episode + 1) % self.save_interval == 0:
339
+ self.save_training_data(episode + 1)
340
+ self.agent.save_model(f'models/dueling_dqn_episode_{episode + 1}.pth')
341
+
342
+ # Emit update signal every 10 episodes or at the end
343
+ if (episode + 1) % 10 == 0 or episode == self.episodes - 1:
344
+ self.update_signal.emit(self.stats.copy())
345
+
346
+ self.finished_signal.emit()
347
+
348
+ def stop(self):
349
+ self.running = False
350
+
351
+ def save_training_data(self, episode):
352
+ data = {
353
+ 'episode': episode,
354
+ 'epsilon': self.agent.epsilon,
355
+ 'avg_reward': self.stats['avg_reward'],
356
+ 'wins': self.stats['wins'],
357
+ 'losses': self.stats['losses'],
358
+ 'draws': self.stats['draws'],
359
+ 'win_rate': self.stats['wins'] / episode if episode > 0 else 0,
360
+ 'timestamp': datetime.datetime.now().isoformat()
361
+ }
362
+
363
+ # Save to CSV
364
+ file_exists = os.path.isfile('training_data.csv')
365
+ with open('training_data.csv', 'a', newline='') as f:
366
+ writer = csv.DictWriter(f, fieldnames=data.keys())
367
+ if not file_exists:
368
+ writer.writeheader()
369
+ writer.writerow(data)
370
+
371
+ class TicTacToeGUI(QMainWindow):
372
+ def __init__(self):
373
+ super().__init__()
374
+ self.env = TicTacToeEnvironment()
375
+ self.agent = DuelingDQNAgent(state_size=27, action_size=9) # 9 positions * 3 states each
376
+ self.training_thread = None
377
+ self.is_training = False
378
+
379
+ # Create models directory
380
+ os.makedirs('models', exist_ok=True)
381
+
382
+ self.init_ui()
383
+ self.update_display()
384
+
385
+ def init_ui(self):
386
+ self.setWindowTitle("Dueling DQN Tic-Tac-Toe Trainer")
387
+ self.setGeometry(100, 100, 1200, 800)
388
+
389
+ # Central widget and main layout
390
+ central_widget = QWidget()
391
+ self.setCentralWidget(central_widget)
392
+ main_layout = QHBoxLayout(central_widget)
393
+
394
+ # Left panel - Game board and controls
395
+ left_panel = QVBoxLayout()
396
+
397
+ # Game board
398
+ board_group = QGroupBox("Tic-Tac-Toe Board")
399
+ board_layout = QGridLayout()
400
+ self.board_buttons = []
401
+ for i in range(9):
402
+ btn = QPushButton('')
403
+ btn.setFixedSize(80, 80)
404
+ btn.setFont(QFont('Arial', 24))
405
+ btn.clicked.connect(lambda checked, pos=i: self.human_move(pos))
406
+ board_layout.addWidget(btn, i // 3, i % 3)
407
+ self.board_buttons.append(btn)
408
+ board_group.setLayout(board_layout)
409
+ left_panel.addWidget(board_group)
410
+
411
+ # Game controls
412
+ controls_group = QGroupBox("Game Controls")
413
+ controls_layout = QVBoxLayout()
414
+
415
+ self.status_label = QLabel("Status: Ready")
416
+ controls_layout.addWidget(self.status_label)
417
+
418
+ control_buttons_layout = QHBoxLayout()
419
+ self.reset_btn = QPushButton("New Game")
420
+ self.reset_btn.clicked.connect(self.reset_game)
421
+ control_buttons_layout.addWidget(self.reset_btn)
422
+
423
+ self.auto_play_btn = QPushButton("Auto Play")
424
+ self.auto_play_btn.clicked.connect(self.auto_play)
425
+ control_buttons_layout.addWidget(self.auto_play_btn)
426
+
427
+ controls_layout.addLayout(control_buttons_layout)
428
+ controls_group.setLayout(controls_layout)
429
+ left_panel.addWidget(controls_group)
430
+
431
+ # Right panel - Training and visualization
432
+ right_panel = QVBoxLayout()
433
+
434
+ # Training controls
435
+ training_group = QGroupBox("Training Controls")
436
+ training_layout = QVBoxLayout()
437
+
438
+ # Training parameters
439
+ params_layout = QGridLayout()
440
+
441
+ params_layout.addWidget(QLabel("Episodes:"), 0, 0)
442
+ self.episodes_spin = QSpinBox()
443
+ self.episodes_spin.setRange(100, 100000)
444
+ self.episodes_spin.setValue(5000)
445
+ params_layout.addWidget(self.episodes_spin, 0, 1)
446
+
447
+ params_layout.addWidget(QLabel("Learning Rate:"), 1, 0)
448
+ self.lr_spin = QDoubleSpinBox()
449
+ self.lr_spin.setRange(0.0001, 0.01)
450
+ self.lr_spin.setValue(0.001)
451
+ self.lr_spin.setSingleStep(0.0001)
452
+ self.lr_spin.setDecimals(4)
453
+ params_layout.addWidget(self.lr_spin, 1, 1)
454
+
455
+ params_layout.addWidget(QLabel("Gamma:"), 2, 0)
456
+ self.gamma_spin = QDoubleSpinBox()
457
+ self.gamma_spin.setRange(0.1, 0.999)
458
+ self.gamma_spin.setValue(0.99)
459
+ self.gamma_spin.setSingleStep(0.001)
460
+ self.gamma_spin.setDecimals(3)
461
+ params_layout.addWidget(self.gamma_spin, 2, 1)
462
+
463
+ params_layout.addWidget(QLabel("Device:"), 3, 0)
464
+ self.device_combo = QComboBox()
465
+ self.device_combo.addItems(['auto', 'cpu', 'mps', 'cuda'])
466
+ self.device_combo.setCurrentText('auto')
467
+ params_layout.addWidget(self.device_combo, 3, 1)
468
+
469
+ training_layout.addLayout(params_layout)
470
+
471
+ # Training buttons
472
+ train_buttons_layout = QHBoxLayout()
473
+
474
+ self.train_btn = QPushButton("Start Training")
475
+ self.train_btn.clicked.connect(self.toggle_training)
476
+ train_buttons_layout.addWidget(self.train_btn)
477
+
478
+ self.load_model_btn = QPushButton("Load Model")
479
+ self.load_model_btn.clicked.connect(self.load_model)
480
+ train_buttons_layout.addWidget(self.load_model_btn)
481
+
482
+ self.save_model_btn = QPushButton("Save Model")
483
+ self.save_model_btn.clicked.connect(self.save_model)
484
+ train_buttons_layout.addWidget(self.save_model_btn)
485
+
486
+ training_layout.addLayout(train_buttons_layout)
487
+
488
+ # Progress bar
489
+ self.progress_bar = QProgressBar()
490
+ training_layout.addWidget(self.progress_bar)
491
+
492
+ training_group.setLayout(training_layout)
493
+ right_panel.addWidget(training_group)
494
+
495
+ # Statistics
496
+ stats_group = QGroupBox("Training Statistics")
497
+ stats_layout = QVBoxLayout()
498
+
499
+ self.stats_text = QTextEdit()
500
+ self.stats_text.setReadOnly(True)
501
+ self.stats_text.setMaximumHeight(150)
502
+ stats_layout.addWidget(self.stats_text)
503
+
504
+ stats_group.setLayout(stats_layout)
505
+ right_panel.addWidget(stats_group)
506
+
507
+ # Log output
508
+ log_group = QGroupBox("Training Log")
509
+ log_layout = QVBoxLayout()
510
+
511
+ self.log_text = QTextEdit()
512
+ self.log_text.setReadOnly(True)
513
+ log_layout.addWidget(self.log_text)
514
+
515
+ log_group.setLayout(log_layout)
516
+ right_panel.addWidget(log_group)
517
+
518
+ # Add panels to main layout
519
+ main_layout.addLayout(left_panel, 1)
520
+ main_layout.addLayout(right_panel, 1)
521
+
522
+ # Timer for auto-play
523
+ self.auto_play_timer = QTimer()
524
+ self.auto_play_timer.timeout.connect(self.auto_play_step)
525
+
526
+ def toggle_training(self):
527
+ if self.is_training:
528
+ self.stop_training()
529
+ else:
530
+ self.start_training()
531
+
532
+ def start_training(self):
533
+ device = self.device_combo.currentText()
534
+ self.agent = DuelingDQNAgent(
535
+ state_size=27,
536
+ action_size=9,
537
+ learning_rate=self.lr_spin.value(),
538
+ gamma=self.gamma_spin.value(),
539
+ device=device
540
+ )
541
+
542
+ self.training_thread = TrainingThread(
543
+ self.agent,
544
+ TicTacToeEnvironment(),
545
+ episodes=self.episodes_spin.value(),
546
+ save_interval=100
547
+ )
548
+
549
+ self.training_thread.update_signal.connect(self.update_training_stats)
550
+ self.training_thread.finished_signal.connect(self.training_finished)
551
+
552
+ self.is_training = True
553
+ self.train_btn.setText("Stop Training")
554
+ self.status_label.setText("Status: Training...")
555
+ self.progress_bar.setRange(0, self.episodes_spin.value())
556
+
557
+ self.training_thread.start()
558
+
559
+ def stop_training(self):
560
+ if self.training_thread:
561
+ self.training_thread.stop()
562
+ self.training_thread.wait()
563
+ self.is_training = False
564
+ self.train_btn.setText("Start Training")
565
+ self.status_label.setText("Status: Training Stopped")
566
+
567
+ def training_finished(self):
568
+ self.is_training = False
569
+ self.train_btn.setText("Start Training")
570
+ self.status_label.setText("Status: Training Completed")
571
+ self.log_message("Training completed!")
572
+
573
+ def update_training_stats(self, stats):
574
+ self.progress_bar.setValue(stats['episodes'])
575
+
576
+ stats_text = f"""
577
+ Episodes: {stats['episodes']}
578
+ Wins: {stats['wins']} | Losses: {stats['losses']} | Draws: {stats['draws']}
579
+ Win Rate: {stats['wins']/stats['episodes']*100:.1f}%
580
+ Average Reward: {stats['avg_reward']:.3f}
581
+ Epsilon: {stats['epsilon']:.4f}
582
+ Current Loss: {stats['loss']:.4f}
583
+ """.strip()
584
+
585
+ self.stats_text.setText(stats_text)
586
+
587
+ def log_message(self, message):
588
+ timestamp = datetime.datetime.now().strftime("%H:%M:%S")
589
+ self.log_text.append(f"[{timestamp}] {message}")
590
+ # Auto-scroll to bottom
591
+ self.log_text.verticalScrollBar().setValue(
592
+ self.log_text.verticalScrollBar().maximum()
593
+ )
594
+
595
+ def reset_game(self):
596
+ self.env.reset()
597
+ self.update_display()
598
+ self.status_label.setText("Status: New Game Started")
599
+ self.auto_play_timer.stop()
600
+
601
+ def human_move(self, position):
602
+ if self.env.done or self.env.current_player != 1:
603
+ return
604
+
605
+ valid_moves = self.env.get_valid_moves()
606
+ if position in valid_moves:
607
+ state, reward, done, info = self.env.step(position)
608
+ self.update_display()
609
+
610
+ if done:
611
+ self.game_over(info)
612
+ else:
613
+ # Agent's turn
614
+ QTimer.singleShot(500, self.agent_move)
615
+
616
+ def agent_move(self):
617
+ if self.env.done or self.env.current_player != 2:
618
+ return
619
+
620
+ state = self.env.get_state()
621
+ valid_moves = self.env.get_valid_moves()
622
+
623
+ if valid_moves:
624
+ action = self.agent.act(state, valid_moves, training=False)
625
+ next_state, reward, done, info = self.env.step(action)
626
+ self.update_display()
627
+
628
+ if done:
629
+ self.game_over(info)
630
+
631
+ def auto_play(self):
632
+ if self.env.done:
633
+ self.reset_game()
634
+
635
+ self.auto_play_timer.start(1000) # 1 second between moves
636
+ self.status_label.setText("Status: Auto-playing...")
637
+
638
+ def auto_play_step(self):
639
+ if self.env.done:
640
+ self.auto_play_timer.stop()
641
+ self.status_label.setText("Status: Game Over - Auto-play")
642
+ return
643
+
644
+ if self.env.current_player == 1:
645
+ # Human player (random move for demo)
646
+ valid_moves = self.env.get_valid_moves()
647
+ if valid_moves:
648
+ action = random.choice(valid_moves)
649
+ self.env.step(action)
650
+ else:
651
+ # Agent player
652
+ state = self.env.get_state()
653
+ valid_moves = self.env.get_valid_moves()
654
+ if valid_moves:
655
+ action = self.agent.act(state, valid_moves, training=False)
656
+ self.env.step(action)
657
+
658
+ self.update_display()
659
+
660
+ if self.env.done:
661
+ self.auto_play_timer.stop()
662
+ self.game_over({'winner': self.env.winner})
663
+
664
+ def game_over(self, info):
665
+ winner = info.get('winner', 0)
666
+ if winner == 1:
667
+ self.status_label.setText("Status: You Win!")
668
+ elif winner == 2:
669
+ self.status_label.setText("Status: AI Wins!")
670
+ else:
671
+ self.status_label.setText("Status: Draw!")
672
+
673
+ def update_display(self):
674
+ symbols = {0: '', 1: 'X', 2: 'O'}
675
+ colors = {0: 'black', 1: 'blue', 2: 'red'}
676
+
677
+ for i in range(9):
678
+ symbol = symbols[self.env.board[i]]
679
+ color = colors[self.env.board[i]]
680
+ self.board_buttons[i].setText(symbol)
681
+ self.board_buttons[i].setStyleSheet(f"color: {color}; font-weight: bold;")
682
+
683
+ def load_model(self):
684
+ try:
685
+ if self.agent.load_model('models/dueling_dqn_latest.pth'):
686
+ self.log_message("Model loaded successfully!")
687
+ self.status_label.setText("Status: Model Loaded")
688
+ else:
689
+ self.log_message("No saved model found!")
690
+ except Exception as e:
691
+ self.log_message(f"Error loading model: {str(e)}")
692
+
693
+ def save_model(self):
694
+ try:
695
+ self.agent.save_model('models/dueling_dqn_latest.pth')
696
+ self.log_message("Model saved successfully!")
697
+ except Exception as e:
698
+ self.log_message(f"Error saving model: {str(e)}")
699
+
700
+ def main():
701
+ # Create necessary directories
702
+ os.makedirs('models', exist_ok=True)
703
+
704
+ app = QApplication(sys.argv)
705
+
706
+ # Set application style
707
+ app.setStyle('Fusion')
708
+
709
+ # Create and show main window
710
+ window = TicTacToeGUI()
711
+ window.show()
712
+
713
+ # Log startup message
714
+ window.log_message("Dueling DQN Tic-Tac-Toe Application Started")
715
+ window.log_message(f"Using PyTorch {torch.__version__}")
716
+ window.log_message(f"Available devices: CPU: True, CUDA: {torch.cuda.is_available()}, MPS: {torch.backends.mps.is_available()}")
717
+
718
+ sys.exit(app.exec_())
719
+
720
+ if __name__ == '__main__':
721
+ main()
Tic Tac Toe RL/eval_models.py ADDED
@@ -0,0 +1,464 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # eval_models.py
2
+ import os
3
+ import glob
4
+ import numpy as np
5
+ import torch
6
+ import matplotlib.pyplot as plt
7
+ import pandas as pd
8
+ from collections import deque
9
+ import logging
10
+
11
+ # Configure logging
12
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
13
+ logger = logging.getLogger(__name__)
14
+
15
+ class DuelingDQN(torch.nn.Module):
16
+ def __init__(self, input_size, output_size, hidden_size=256):
17
+ super(DuelingDQN, self).__init__()
18
+ self.input_size = input_size
19
+ self.output_size = output_size
20
+
21
+ # Feature layer
22
+ self.feature = torch.nn.Sequential(
23
+ torch.nn.Linear(input_size, hidden_size),
24
+ torch.nn.ReLU(),
25
+ torch.nn.Linear(hidden_size, hidden_size),
26
+ torch.nn.ReLU(),
27
+ torch.nn.Linear(hidden_size, hidden_size // 2),
28
+ torch.nn.ReLU()
29
+ )
30
+
31
+ # Value stream
32
+ self.value_stream = torch.nn.Sequential(
33
+ torch.nn.Linear(hidden_size // 2, hidden_size // 4),
34
+ torch.nn.ReLU(),
35
+ torch.nn.Linear(hidden_size // 4, 1)
36
+ )
37
+
38
+ # Advantage stream
39
+ self.advantage_stream = torch.nn.Sequential(
40
+ torch.nn.Linear(hidden_size // 2, hidden_size // 4),
41
+ torch.nn.ReLU(),
42
+ torch.nn.Linear(hidden_size // 4, output_size)
43
+ )
44
+
45
+ def forward(self, state):
46
+ features = self.feature(state)
47
+ value = self.value_stream(features)
48
+ advantage = self.advantage_stream(features)
49
+ q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
50
+ return q_values
51
+
52
+ class TicTacToeEnvironment:
53
+ def __init__(self):
54
+ self.reset()
55
+
56
+ def reset(self):
57
+ self.board = np.zeros(9, dtype=int)
58
+ self.current_player = 1
59
+ self.done = False
60
+ self.winner = 0
61
+ return self.get_state()
62
+
63
+ def get_state(self):
64
+ state = np.zeros(9 * 3, dtype=np.float32)
65
+ for i in range(9):
66
+ if self.board[i] == 0:
67
+ state[i * 3] = 1.0
68
+ elif self.board[i] == 1:
69
+ state[i * 3 + 1] = 1.0
70
+ else:
71
+ state[i * 3 + 2] = 1.0
72
+ return state
73
+
74
+ def get_valid_moves(self):
75
+ return [i for i in range(9) if self.board[i] == 0]
76
+
77
+ def step(self, action):
78
+ if self.done:
79
+ return self.get_state(), 0, True, {}
80
+
81
+ if self.board[action] != 0:
82
+ return self.get_state(), -5, True, {}
83
+
84
+ self.board[action] = self.current_player
85
+
86
+ if self.check_win(self.current_player):
87
+ self.done = True
88
+ self.winner = self.current_player
89
+ reward = 10
90
+ elif len(self.get_valid_moves()) == 0:
91
+ self.done = True
92
+ reward = 2
93
+ else:
94
+ reward = 0.1
95
+ self.current_player = 3 - self.current_player
96
+
97
+ return self.get_state(), reward, self.done, {'winner': self.winner}
98
+
99
+ def check_win(self, player):
100
+ winning_combinations = [
101
+ [0, 1, 2], [3, 4, 5], [6, 7, 8],
102
+ [0, 3, 6], [1, 4, 7], [2, 5, 8],
103
+ [0, 4, 8], [2, 4, 6]
104
+ ]
105
+
106
+ for combo in winning_combinations:
107
+ if all(self.board[i] == player for i in combo):
108
+ return True
109
+ return False
110
+
111
+ def render(self):
112
+ symbols = {0: ' ', 1: 'X', 2: 'O'}
113
+ board_str = "\n"
114
+ for i in range(3):
115
+ row = [symbols[self.board[i*3 + j]] for j in range(3)]
116
+ board_str += " " + " | ".join(row) + " \n"
117
+ if i < 2:
118
+ board_str += "-----------\n"
119
+ return board_str
120
+
121
+ class ModelEvaluator:
122
+ def __init__(self, models_dir='models'):
123
+ self.models_dir = models_dir
124
+ self.device = torch.device("mps" if torch.backends.mps.is_available() else
125
+ "cuda" if torch.cuda.is_available() else "cpu")
126
+ logger.info(f"Using device: {self.device}")
127
+
128
+ def load_model(self, model_path):
129
+ """Load model with compatibility for different PyTorch versions"""
130
+ try:
131
+ # Try with weights_only=True first (PyTorch 2.6+)
132
+ checkpoint = torch.load(model_path, map_location=self.device, weights_only=True)
133
+ except:
134
+ try:
135
+ # Fallback to weights_only=False
136
+ checkpoint = torch.load(model_path, map_location=self.device, weights_only=False)
137
+ except Exception as e:
138
+ logger.error(f"Failed to load model {model_path}: {e}")
139
+ return None
140
+
141
+ state_size = 27
142
+ action_size = 9
143
+ model = DuelingDQN(state_size, action_size).to(self.device)
144
+ model.load_state_dict(checkpoint['policy_net_state_dict'])
145
+ model.eval()
146
+
147
+ return model
148
+
149
+ def evaluate_model(self, model, num_games=100, agent_player=1):
150
+ """Evaluate model performance against random opponent"""
151
+ env = TicTacToeEnvironment()
152
+ wins = 0
153
+ losses = 0
154
+ draws = 0
155
+ total_reward = 0
156
+
157
+ for game in range(num_games):
158
+ state = env.reset()
159
+ game_reward = 0
160
+ steps = 0
161
+
162
+ while not env.done:
163
+ valid_moves = env.get_valid_moves()
164
+
165
+ if env.current_player == agent_player:
166
+ # Agent's turn
167
+ state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
168
+ with torch.no_grad():
169
+ q_values = model(state_tensor)
170
+
171
+ # Mask invalid moves
172
+ q_values_np = q_values.cpu().numpy()[0]
173
+ for i in range(9):
174
+ if i not in valid_moves:
175
+ q_values_np[i] = -float('inf')
176
+
177
+ action = np.argmax(q_values_np)
178
+ else:
179
+ # Random opponent
180
+ action = np.random.choice(valid_moves)
181
+
182
+ state, reward, done, info = env.step(action)
183
+
184
+ if env.current_player == agent_player:
185
+ game_reward += reward
186
+
187
+ steps += 1
188
+
189
+ # Determine game outcome from agent's perspective
190
+ if info['winner'] == agent_player:
191
+ wins += 1
192
+ elif info['winner'] == 0:
193
+ draws += 1
194
+ else:
195
+ losses += 1
196
+
197
+ total_reward += game_reward
198
+
199
+ win_rate = wins / num_games
200
+ avg_reward = total_reward / num_games
201
+
202
+ return {
203
+ 'wins': wins,
204
+ 'losses': losses,
205
+ 'draws': draws,
206
+ 'win_rate': win_rate,
207
+ 'avg_reward': avg_reward
208
+ }
209
+
210
+ def play_interactive_game(self, model):
211
+ """Play an interactive game against the model"""
212
+ env = TicTacToeEnvironment()
213
+
214
+ print("\n🎮 Interactive Game Mode")
215
+ print("You are 'X', AI is 'O'")
216
+ print("Enter moves as numbers 0-8 (left to right, top to bottom):")
217
+ print("0 | 1 | 2")
218
+ print("---------")
219
+ print("3 | 4 | 5")
220
+ print("---------")
221
+ print("6 | 7 | 8")
222
+
223
+ while not env.done:
224
+ print(env.render())
225
+
226
+ if env.current_player == 1: # Human turn
227
+ valid_moves = env.get_valid_moves()
228
+ print(f"Your turn. Valid moves: {valid_moves}")
229
+
230
+ try:
231
+ move = int(input("Enter your move (0-8): "))
232
+ if move not in valid_moves:
233
+ print("Invalid move! Try again.")
234
+ continue
235
+ except ValueError:
236
+ print("Please enter a number between 0-8")
237
+ continue
238
+
239
+ env.step(move)
240
+
241
+ else: # AI turn
242
+ print("AI is thinking...")
243
+ state = env.get_state()
244
+ valid_moves = env.get_valid_moves()
245
+
246
+ state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
247
+ with torch.no_grad():
248
+ q_values = model(state_tensor)
249
+
250
+ # Mask invalid moves
251
+ q_values_np = q_values.cpu().numpy()[0]
252
+ for i in range(9):
253
+ if i not in valid_moves:
254
+ q_values_np[i] = -float('inf')
255
+
256
+ action = np.argmax(q_values_np)
257
+ env.step(action)
258
+ print(f"AI played move: {action}")
259
+
260
+ print(env.render())
261
+ if env.winner == 1:
262
+ print("🎉 You win!")
263
+ elif env.winner == 2:
264
+ print("🤖 AI wins!")
265
+ else:
266
+ print("🤝 It's a draw!")
267
+
268
+ def run_comprehensive_evaluation(self, num_games_per_model=50):
269
+ """Evaluate all models in the models directory"""
270
+ model_files = glob.glob(os.path.join(self.models_dir, "*.pth"))
271
+ model_files.sort() # Sort by name to maintain episode order
272
+
273
+ if not model_files:
274
+ logger.error("No model files found in models directory!")
275
+ return
276
+
277
+ results = []
278
+
279
+ print(f"\n🔍 Evaluating {len(model_files)} models with {num_games_per_model} games each...")
280
+
281
+ for model_file in model_files:
282
+ model_name = os.path.basename(model_file)
283
+ print(f"\nEvaluating: {model_name}")
284
+
285
+ model = self.load_model(model_file)
286
+ if model is None:
287
+ continue
288
+
289
+ # Evaluate as both player 1 and player 2
290
+ stats_p1 = self.evaluate_model(model, num_games_per_model, agent_player=1)
291
+ stats_p2 = self.evaluate_model(model, num_games_per_model, agent_player=2)
292
+
293
+ # Extract episode number from filename
294
+ episode_num = self._extract_episode_number(model_name)
295
+
296
+ result = {
297
+ 'model_file': model_name,
298
+ 'episode': episode_num,
299
+ 'win_rate_p1': stats_p1['win_rate'],
300
+ 'win_rate_p2': stats_p2['win_rate'],
301
+ 'avg_win_rate': (stats_p1['win_rate'] + stats_p2['win_rate']) / 2,
302
+ 'wins_p1': stats_p1['wins'],
303
+ 'losses_p1': stats_p1['losses'],
304
+ 'draws_p1': stats_p1['draws'],
305
+ 'wins_p2': stats_p2['wins'],
306
+ 'losses_p2': stats_p2['losses'],
307
+ 'draws_p2': stats_p2['draws'],
308
+ 'avg_reward_p1': stats_p1['avg_reward'],
309
+ 'avg_reward_p2': stats_p2['avg_reward']
310
+ }
311
+
312
+ results.append(result)
313
+
314
+ print(f" As Player 1: Win Rate: {stats_p1['win_rate']:.1%}")
315
+ print(f" As Player 2: Win Rate: {stats_p2['win_rate']:.1%}")
316
+ print(f" Average Win Rate: {result['avg_win_rate']:.1%}")
317
+
318
+ # Save results to CSV
319
+ self._save_results_to_csv(results)
320
+
321
+ # Create visualization
322
+ self._create_visualizations(results)
323
+
324
+ # Find and test the best model
325
+ best_model_info = max(results, key=lambda x: x['avg_win_rate'])
326
+ best_model_path = os.path.join(self.models_dir, best_model_info['model_file'])
327
+ best_model = self.load_model(best_model_path)
328
+
329
+ print(f"\n🏆 Best Model: {best_model_info['model_file']}")
330
+ print(f" Average Win Rate: {best_model_info['avg_win_rate']:.1%}")
331
+
332
+ # Interactive game with best model
333
+ while True:
334
+ play = input("\nWould you like to play against the best model? (y/n): ").lower().strip()
335
+ if play == 'y':
336
+ self.play_interactive_game(best_model)
337
+
338
+ play_again = input("\nPlay again? (y/n): ").lower().strip()
339
+ if play_again != 'y':
340
+ break
341
+ else:
342
+ break
343
+
344
+ def _extract_episode_number(self, filename):
345
+ """Extract episode number from filename"""
346
+ import re
347
+ match = re.search(r'episode_(\d+)', filename)
348
+ return int(match.group(1)) if match else 0
349
+
350
+ def _save_results_to_csv(self, results):
351
+ """Save evaluation results to CSV"""
352
+ df = pd.DataFrame(results)
353
+ csv_path = 'model_evaluation_results.csv'
354
+ df.to_csv(csv_path, index=False)
355
+ print(f"\n📊 Results saved to: {csv_path}")
356
+
357
+ # Print summary statistics
358
+ print(f"\n📈 Summary Statistics:")
359
+ print(f" Models evaluated: {len(results)}")
360
+ print(f" Best win rate: {df['avg_win_rate'].max():.1%}")
361
+ print(f" Worst win rate: {df['avg_win_rate'].min():.1%}")
362
+ print(f" Average win rate: {df['avg_win_rate'].mean():.1%}")
363
+
364
+ def _create_visualizations(self, results):
365
+ """Create visualization plots for model performance"""
366
+ episodes = [r['episode'] for r in results]
367
+ win_rates_p1 = [r['win_rate_p1'] for r in results]
368
+ win_rates_p2 = [r['win_rate_p2'] for r in results]
369
+ avg_win_rates = [r['avg_win_rate'] for r in results]
370
+
371
+ plt.figure(figsize=(12, 8))
372
+
373
+ plt.subplot(2, 2, 1)
374
+ plt.plot(episodes, win_rates_p1, 'b-', label='As Player 1', alpha=0.7)
375
+ plt.plot(episodes, win_rates_p2, 'r-', label='As Player 2', alpha=0.7)
376
+ plt.plot(episodes, avg_win_rates, 'g-', label='Average', linewidth=2)
377
+ plt.xlabel('Training Episode')
378
+ plt.ylabel('Win Rate')
379
+ plt.title('Model Performance vs Random Opponent')
380
+ plt.legend()
381
+ plt.grid(True, alpha=0.3)
382
+
383
+ plt.subplot(2, 2, 2)
384
+ plt.scatter(episodes, avg_win_rates, c=avg_win_rates, cmap='viridis', alpha=0.6)
385
+ plt.colorbar(label='Win Rate')
386
+ plt.xlabel('Training Episode')
387
+ plt.ylabel('Average Win Rate')
388
+ plt.title('Learning Progress')
389
+ plt.grid(True, alpha=0.3)
390
+
391
+ plt.subplot(2, 2, 3)
392
+ outcomes_p1 = np.array([(r['wins_p1'], r['losses_p1'], r['draws_p1']) for r in results])
393
+ outcomes_p1 = outcomes_p1 / outcomes_p1.sum(axis=1, keepdims=True)
394
+ plt.stackplot(episodes, outcomes_p1.T, labels=['Wins', 'Losses', 'Draws'], alpha=0.7)
395
+ plt.xlabel('Training Episode')
396
+ plt.ylabel('Proportion')
397
+ plt.title('Outcomes as Player 1')
398
+ plt.legend()
399
+ plt.grid(True, alpha=0.3)
400
+
401
+ plt.subplot(2, 2, 4)
402
+ outcomes_p2 = np.array([(r['wins_p2'], r['losses_p2'], r['draws_p2']) for r in results])
403
+ outcomes_p2 = outcomes_p2 / outcomes_p2.sum(axis=1, keepdims=True)
404
+ plt.stackplot(episodes, outcomes_p2.T, labels=['Wins', 'Losses', 'Draws'], alpha=0.7)
405
+ plt.xlabel('Training Episode')
406
+ plt.ylabel('Proportion')
407
+ plt.title('Outcomes as Player 2')
408
+ plt.legend()
409
+ plt.grid(True, alpha=0.3)
410
+
411
+ plt.tight_layout()
412
+ plt.savefig('model_performance_analysis.png', dpi=300, bbox_inches='tight')
413
+ print("📈 Visualization saved as: model_performance_analysis.png")
414
+
415
+ def main():
416
+ evaluator = ModelEvaluator()
417
+
418
+ print("=" * 60)
419
+ print("🤖 Dueling DQN Tic-Tac-Toe Model Evaluator")
420
+ print("=" * 60)
421
+
422
+ while True:
423
+ print("\nOptions:")
424
+ print("1. Comprehensive evaluation of all models")
425
+ print("2. Interactive game with specific model")
426
+ print("3. Exit")
427
+
428
+ choice = input("\nEnter your choice (1-3): ").strip()
429
+
430
+ if choice == '1':
431
+ num_games = input("Enter number of games per model (default 50): ").strip()
432
+ num_games = int(num_games) if num_games.isdigit() else 50
433
+ evaluator.run_comprehensive_evaluation(num_games)
434
+
435
+ elif choice == '2':
436
+ model_files = glob.glob("models/*.pth")
437
+ if not model_files:
438
+ print("No model files found in models directory!")
439
+ continue
440
+
441
+ print("\nAvailable models:")
442
+ for i, model_file in enumerate(model_files, 1):
443
+ print(f"{i}. {os.path.basename(model_file)}")
444
+
445
+ try:
446
+ model_choice = int(input(f"\nSelect model (1-{len(model_files)}): ")) - 1
447
+ if 0 <= model_choice < len(model_files):
448
+ model = evaluator.load_model(model_files[model_choice])
449
+ if model:
450
+ evaluator.play_interactive_game(model)
451
+ else:
452
+ print("Invalid selection!")
453
+ except ValueError:
454
+ print("Please enter a valid number!")
455
+
456
+ elif choice == '3':
457
+ print("Goodbye!")
458
+ break
459
+
460
+ else:
461
+ print("Invalid choice! Please enter 1, 2, or 3.")
462
+
463
+ if __name__ == '__main__':
464
+ main()
Tic Tac Toe RL/model_evaluation_results.csv ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_file,episode,win_rate_p1,win_rate_p2,avg_win_rate,wins_p1,losses_p1,draws_p1,wins_p2,losses_p2,draws_p2,avg_reward_p1,avg_reward_p2
2
+ dueling_dqn_episode_100.pth,100,0.76,0.44,0.6,38,10,2,22,23,5,7.982000000000002,4.752000000000004
3
+ dueling_dqn_episode_1000.pth,1000,0.58,0.14,0.36,29,17,4,7,30,13,6.278000000000002,1.76
4
+ dueling_dqn_episode_1100.pth,1100,0.52,0.12,0.32,26,12,12,6,35,9,6.004000000000003,1.5460000000000003
5
+ dueling_dqn_episode_1200.pth,1200,0.48,0.22,0.35,24,17,9,11,29,10,5.488000000000003,2.5500000000000007
6
+ dueling_dqn_episode_1300.pth,1300,0.64,0.16,0.4,32,12,6,8,34,8,6.958000000000001,1.9580000000000015
7
+ dueling_dqn_episode_1400.pth,1400,0.52,0.24,0.38,26,11,13,12,33,5,6.072,2.7560000000000016
8
+ dueling_dqn_episode_1500.pth,1500,0.56,0.28,0.42000000000000004,28,14,8,14,30,6,6.238000000000002,3.150000000000003
9
+ dueling_dqn_episode_1600.pth,1600,0.4,0.14,0.27,20,27,3,7,39,4,4.438000000000004,1.7460000000000002
10
+ dueling_dqn_episode_1700.pth,1700,0.36,0.16,0.26,18,26,6,8,39,3,4.160000000000003,1.942000000000001
11
+ dueling_dqn_episode_1800.pth,1800,0.34,0.04,0.19,17,28,5,2,46,2,3.912000000000004,0.7399999999999993
12
+ dueling_dqn_episode_1900.pth,1900,0.38,0.06,0.22,19,26,5,3,43,4,4.320000000000003,0.938
13
+ dueling_dqn_episode_200.pth,200,0.7,0.32,0.51,35,8,7,16,24,10,7.602,3.542000000000002
14
+ dueling_dqn_episode_2000.pth,2000,0.44,0.12,0.28,22,24,4,6,40,4,4.888000000000004,1.5539999999999998
15
+ dueling_dqn_episode_2100.pth,2100,0.32,0.26,0.29000000000000004,16,32,2,13,33,4,3.5820000000000034,2.9460000000000006
16
+ dueling_dqn_episode_2200.pth,2200,0.36,0.14,0.25,18,30,2,7,38,5,3.992000000000003,1.734
17
+ dueling_dqn_episode_2300.pth,2300,0.52,0.14,0.33,26,20,4,7,36,7,5.682,1.743999999999999
18
+ dueling_dqn_episode_2400.pth,2400,0.56,0.22,0.39,28,18,4,11,36,3,6.100000000000001,2.5480000000000014
19
+ dueling_dqn_episode_2500.pth,2500,0.26,0.14,0.2,13,25,12,7,37,6,3.4080000000000026,1.7520000000000002
20
+ dueling_dqn_episode_2600.pth,2600,0.46,0.18,0.32,23,21,6,9,36,5,5.174000000000003,2.134000000000001
21
+ dueling_dqn_episode_2700.pth,2700,0.48,0.2,0.33999999999999997,24,15,11,10,37,3,5.586000000000001,2.3320000000000003
22
+ dueling_dqn_episode_2800.pth,2800,0.3,0.14,0.22,15,22,13,7,42,1,3.8620000000000014,1.7440000000000004
23
+ dueling_dqn_episode_2900.pth,2900,0.4,0.12,0.26,20,23,7,6,42,2,4.600000000000003,1.534
24
+ dueling_dqn_episode_300.pth,300,0.6,0.4,0.5,30,17,3,20,27,3,6.379999999999999,4.330000000000002
25
+ dueling_dqn_episode_3000.pth,3000,0.4,0.18,0.29000000000000004,20,23,7,9,40,1,4.610000000000003,2.1280000000000006
26
+ dueling_dqn_episode_3100.pth,3100,0.48,0.16,0.32,24,20,6,8,40,2,5.3720000000000026,1.9460000000000008
27
+ dueling_dqn_episode_3200.pth,3200,0.24,0.14,0.19,12,26,12,7,42,1,3.2040000000000015,1.7420000000000002
28
+ dueling_dqn_episode_3300.pth,3300,0.34,0.22,0.28,17,27,6,11,37,2,3.960000000000002,2.5380000000000007
29
+ dueling_dqn_episode_3400.pth,3400,0.52,0.1,0.31,26,15,9,5,43,2,5.8980000000000015,1.3279999999999992
30
+ dueling_dqn_episode_3500.pth,3500,0.44,0.2,0.32,22,24,4,10,37,3,4.890000000000004,2.3420000000000005
31
+ dueling_dqn_episode_3600.pth,3600,0.42,0.16,0.29,21,19,10,8,42,0,4.946000000000002,1.9320000000000008
32
+ dueling_dqn_episode_3700.pth,3700,0.28,0.32,0.30000000000000004,14,26,10,16,34,0,3.534000000000002,3.5420000000000016
33
+ dueling_dqn_episode_3800.pth,3800,0.42,0.08,0.25,21,23,6,4,46,0,4.760000000000004,1.1299999999999992
34
+ dueling_dqn_episode_3900.pth,3900,0.36,0.2,0.28,18,24,8,10,37,3,4.250000000000002,2.3280000000000003
35
+ dueling_dqn_episode_400.pth,400,0.54,0.32,0.43000000000000005,27,19,4,16,27,7,5.882000000000002,3.5400000000000036
36
+ dueling_dqn_episode_4000.pth,4000,0.46,0.08,0.27,23,17,10,4,40,6,5.3480000000000025,1.1399999999999995
37
+ dueling_dqn_episode_4100.pth,4100,0.48,0.22,0.35,24,19,7,11,36,3,5.424000000000002,2.5400000000000005
38
+ dueling_dqn_episode_4200.pth,4200,0.44,0.22,0.33,22,18,10,11,31,8,5.130000000000003,2.564000000000001
39
+ dueling_dqn_episode_4300.pth,4300,0.42,0.16,0.29,21,21,8,8,36,6,4.852000000000003,1.9479999999999993
40
+ dueling_dqn_episode_4400.pth,4400,0.64,0.24,0.44,32,9,9,12,30,8,7.092,2.7540000000000013
41
+ dueling_dqn_episode_4500.pth,4500,0.62,0.24,0.43,31,11,8,12,33,5,6.85,2.7500000000000004
42
+ dueling_dqn_episode_4600.pth,4600,0.54,0.26,0.4,27,16,7,13,32,5,6.016000000000003,2.9560000000000013
43
+ dueling_dqn_episode_4700.pth,4700,0.62,0.22,0.42,31,13,6,11,32,7,6.7760000000000025,2.5400000000000005
44
+ dueling_dqn_episode_4800.pth,4800,0.56,0.3,0.43000000000000005,28,16,6,15,30,5,6.172000000000004,3.352000000000002
45
+ dueling_dqn_episode_4900.pth,4900,0.44,0.44,0.44,22,24,4,22,23,5,4.8720000000000026,4.7520000000000024
46
+ dueling_dqn_episode_500.pth,500,0.6,0.16,0.38,30,14,6,8,32,10,6.560000000000003,1.9440000000000008
47
+ dueling_dqn_episode_5000.pth,5000,0.54,0.3,0.42000000000000004,27,18,5,15,26,9,5.928000000000003,3.354000000000002
48
+ dueling_dqn_episode_600.pth,600,0.5,0.16,0.33,25,22,3,8,39,3,5.422000000000002,1.9399999999999995
49
+ dueling_dqn_episode_700.pth,700,0.52,0.12,0.32,26,16,8,6,34,10,5.846000000000003,1.56
50
+ dueling_dqn_episode_800.pth,800,0.42,0.06,0.24,21,23,6,3,37,10,4.752000000000003,0.9339999999999997
51
+ dueling_dqn_episode_900.pth,900,0.4,0.12,0.26,20,22,8,6,39,5,4.646000000000005,1.5459999999999996
52
+ dueling_dqn_latest.pth,0,0.44,0.22,0.33,22,14,14,11,33,6,5.320000000000004,2.5620000000000016
Tic Tac Toe RL/model_performance_analysis.png ADDED

Git LFS Details

  • SHA256: 6e803844070c372d8111e48cca8e77494e15733098c2d08bbcfa2e0936c9ae8f
  • Pointer size: 131 Bytes
  • Size of remote file: 759 kB
Tic Tac Toe RL/models/dueling_dqn_episode_100.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7889831812ac7b14064856f34ee5ab01ee87dee643e92ac7f69c17fa90edd81
3
+ size 2197905
Tic Tac Toe RL/models/dueling_dqn_episode_1000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa605d7d49dad36efe8306934fa7113b4b228e7cce90ed8f4b6435f6e941dc2a
3
+ size 4454493
Tic Tac Toe RL/models/dueling_dqn_episode_1100.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d803d2395adece1d6400332108be84ba422cb12a84ebcb2215a676945f5a215e
3
+ size 4644317
Tic Tac Toe RL/models/dueling_dqn_episode_1200.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88625a6137c3739c1e82e59ecf81d8f28b86658b0f6b32deb7ceffd29cc3db2
3
+ size 4851101
Tic Tac Toe RL/models/dueling_dqn_episode_1300.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9384217041ff8518fe68cba9eb04021149c964cb1add5f9de10b66efd1c701c2
3
+ size 5065565
Tic Tac Toe RL/models/dueling_dqn_episode_1400.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f74e2a0d223abc6bf0960cc34192a764a11d3e902dfcb286affb7d1ea3214ab
3
+ size 5130525
Tic Tac Toe RL/models/dueling_dqn_episode_1500.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f29380fab72c71b4ee476e6a314125cb9bb451c397d20b04376fa98cd09afb5d
3
+ size 5146717
Tic Tac Toe RL/models/dueling_dqn_episode_1600.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f7eb75fd7b88b7cbb4adec1e0071928dfd67ea69cba879fa24ce40e6e3bac39
3
+ size 5152861
Tic Tac Toe RL/models/dueling_dqn_episode_1700.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b418935e207e9eaa70bb1779ecc165ba65ffcec8142b77abdb724ff5abc96653
3
+ size 5156765
Tic Tac Toe RL/models/dueling_dqn_episode_1800.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50edd4d7243009e1d5c1e813ad87efc34bc7c01cc5165b81390639e58fd2d8ac
3
+ size 5153437
Tic Tac Toe RL/models/dueling_dqn_episode_1900.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be548035d7be186486838d2265b1c3d95f8fa2b11eda0a8f8c8642223fe5ee48
3
+ size 5152733
Tic Tac Toe RL/models/dueling_dqn_episode_200.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19050edfa22a03f7e0443f695500c1a68307aadf58f2199438ef95a289dc2eec
3
+ size 2417233
Tic Tac Toe RL/models/dueling_dqn_episode_2000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec3e4ed4d208f5a75da8e58c70cb83464d9037719a358bebea76b69786d928b9
3
+ size 5152221
Tic Tac Toe RL/models/dueling_dqn_episode_2100.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6be461c61433f8516fcf60b1f9b91a81f9492698b76c831e5dec7748e7fb0914
3
+ size 5152157
Tic Tac Toe RL/models/dueling_dqn_episode_2200.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f61d3fb28f2e81314bebb4375afc14a71b5861cabc5876160e41dc0f581b4aa
3
+ size 5154077
Tic Tac Toe RL/models/dueling_dqn_episode_2300.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddae05e38057f556a29cb233e37ace40274cb3a03b7e0b29319be844cf38870c
3
+ size 5149469
Tic Tac Toe RL/models/dueling_dqn_episode_2400.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4109bcc122f27be544e91d46efb9ac0d2f9769c2ce7ac6c38689672f42deacc
3
+ size 5150621
Tic Tac Toe RL/models/dueling_dqn_episode_2500.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8807d7202152b6b65a158ca4385529eccb34943657b1349d1fe7b4ff332c3f
3
+ size 5153629
Tic Tac Toe RL/models/dueling_dqn_episode_2600.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11b052c2ff7adc88fd510639db8e4a96eeaab48fcf2ea5b24b7343532766f1c1
3
+ size 5150877
Tic Tac Toe RL/models/dueling_dqn_episode_2700.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015cd27b690ab04191663a5ceb67e29ace9d36b3c65204360dc38c3acd14f531
3
+ size 5150173
Tic Tac Toe RL/models/dueling_dqn_episode_2800.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b10e1a94413e0c6fe284d706b5a735c7480025285a80906c2f08c46b9b72bc
3
+ size 5149789
Tic Tac Toe RL/models/dueling_dqn_episode_2900.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cba87fbbd59e713bc4e6d217762ea09ee2c3b9c06b2d09f1ceb40efe513e459
3
+ size 5146397
Tic Tac Toe RL/models/dueling_dqn_episode_300.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766075d03432a67b90d07f5bae5aae62741318d78717c1f5191a3563ff3801a8
3
+ size 2659281
Tic Tac Toe RL/models/dueling_dqn_episode_3000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f78912717adff486ef96d9f410e696fdce3af785a3907ddad83f458db60be5
3
+ size 5145501
Tic Tac Toe RL/models/dueling_dqn_episode_3100.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:216edbd69bf3c21d246e4a82566b3a9eea7e9b2a989a81413079da9404dd6a50
3
+ size 5144285
Tic Tac Toe RL/models/dueling_dqn_episode_3200.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e0e306d3ed614aa648a650e174e3baad4dcfeb7f0e3855cb4bdc8942eebeab
3
+ size 5145629
Tic Tac Toe RL/models/dueling_dqn_episode_3300.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1f1f8404693a759e62b1f985bd5a7b7f2c81ade12935ab95f5d0ce07ed0675
3
+ size 5149661
Tic Tac Toe RL/models/dueling_dqn_episode_3400.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e925beaad67ac5a006ee6f74a7db596640c1217ac1a540e311e80ca50798427c
3
+ size 5151965
Tic Tac Toe RL/models/dueling_dqn_episode_3500.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213d2e33f51609a0b0442065eafb77c4316280f78c059bce99c55aec2645c193
3
+ size 5151581
Tic Tac Toe RL/models/dueling_dqn_episode_3600.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bae1cb761c8b82b183f2b057dc06006b58e5e08763d106b4d2fc91b7eee66916
3
+ size 5144733
Tic Tac Toe RL/models/dueling_dqn_episode_3700.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85721bf70830cce1615d227ad69238e6b045104b68c8c4c2105e6509d112223d
3
+ size 5136349