TroglodyteDerivations commited on Dec 2, 2025

Commit

661308e

verified ·

1 Parent(s): 86134e9

Upload 73 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
Imitation Learning Tic Tac Toe AI 2/analyze_ttt_model.py +377 -0
Imitation Learning Tic Tac Toe AI 2/create_pretrained_ai.py +129 -0
Imitation Learning Tic Tac Toe AI 2/improved_game.py +529 -0
Imitation Learning Tic Tac Toe AI 2/requirements.txt +2 -0
Imitation Learning Tic Tac Toe AI 2/ttt_ai_model_improved.pkl +3 -0
Imitation Learning Tic Tac Toe AI 2/ttt_diagnostic.py +351 -0
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.30.57 PM.png +3 -0
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.32 PM.png +3 -0
Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.43 PM.png +3 -0
Imitation Learning Tic Tac Toe AI/analyze_ttt_model.py +377 -0
Imitation Learning Tic Tac Toe AI/requirements.txt +2 -0
Imitation Learning Tic Tac Toe AI/tic_tac_toe_ai.py +483 -0
Imitation Learning Tic Tac Toe AI/ttt_ai_model.pkl +3 -0
Imitation Learning Tic Tac Toe AI/ttt_diagnostic.py +351 -0
Tic Tac Toe RL/app.py +721 -0
Tic Tac Toe RL/eval_models.py +464 -0
Tic Tac Toe RL/model_evaluation_results.csv +52 -0
Tic Tac Toe RL/model_performance_analysis.png +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_100.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1000.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1100.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1200.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1300.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1400.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1500.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1600.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1700.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1800.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_1900.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_200.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2000.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2100.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2200.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2300.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2400.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2500.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2600.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2700.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2800.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_2900.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_300.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3000.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3100.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3200.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3300.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3400.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3500.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3600.pth +3 -0
Tic Tac Toe RL/models/dueling_dqn_episode_3700.pth +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.30.57 PM.png filter=lfs diff=lfs merge=lfs -text
+Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.33.32 PM.png filter=lfs diff=lfs merge=lfs -text
+Imitation[[:space:]]Learning[[:space:]]Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]AI/Screenshot[[:space:]]2025-12-02[[:space:]]at[[:space:]]12.33.43 PM.png filter=lfs diff=lfs merge=lfs -text
+output.mp4 filter=lfs diff=lfs merge=lfs -text
+Tic[[:space:]]Tac[[:space:]]Toe[[:space:]]RL/model_performance_analysis.png filter=lfs diff=lfs merge=lfs -text

Imitation Learning Tic Tac Toe AI 2/analyze_ttt_model.py ADDED Viewed

	@@ -0,0 +1,377 @@

+import pickle
+import numpy as np
+import os
+from collections import deque
+import json
+def load_pickle_file(filename="ttt_ai_model.pkl"):
+    """
+    Load and analyze the Tic-Tac-Toe AI model pickle file.
+    Args:
+        filename (str): Path to the pickle file
+    Returns:
+        dict: The loaded data or None if file doesn't exist
+    """
+    if not os.path.exists(filename):
+        print(f"❌ File '{filename}' not found!")
+        print("Possible reasons:")
+        print("1. The game hasn't been played yet")
+        print("2. The file was saved with a different name")
+        print("3. The file is in a different directory")
+        return None
+    try:
+        print(f"📂 Opening '{filename}'...")
+        # Load the pickle file
+        with open(filename, 'rb') as f:
+            data = pickle.load(f)
+        print("✅ File loaded successfully!")
+        print("\n" + "="*60)
+        return data
+    except Exception as e:
+        print(f"❌ Error loading pickle file: {e}")
+        print(f"Error type: {type(e).__name__}")
+        return None
+def analyze_model(data):
+    """
+    Analyze and display information about the AI model.
+    Args:
+        data (dict): The loaded pickle data
+    """
+    if not data:
+        print("No data to analyze")
+        return
+    print("📊 MODEL ANALYSIS")
+    print("="*60)
+    # Check what keys are available
+    print(f"Keys in data: {list(data.keys())}")
+    # Analyze model matrix if present
+    if 'model' in data:
+        model = data['model']
+        print(f"\n🤖 AI Model Information:")
+        print(f"   Shape: {model.shape}")
+        print(f"   Size: {model.size:,} elements")
+        print(f"   Data type: {model.dtype}")
+        # Calculate some statistics
+        print(f"\n📈 Model Statistics:")
+        print(f"   Non-zero entries: {np.count_nonzero(model):,}")
+        print(f"   Zero entries: {np.sum(model == 0):,}")
+        print(f"   Sparsity: {(np.sum(model == 0) / model.size) * 100:.2f}%")
+        # Get min, max, mean values
+        if model.size > 0:
+            flat_model = model.flatten()
+            non_zero_values = flat_model[flat_model != 0]
+            if len(non_zero_values) > 0:
+                print(f"   Min value (non-zero): {non_zero_values.min():.6f}")
+                print(f"   Max value: {flat_model.max():.6f}")
+                print(f"   Mean value (non-zero): {non_zero_values.mean():.6f}")
+                print(f"   Std dev (non-zero): {non_zero_values.std():.6f}")
+            # Count of positive vs negative values
+            positive = np.sum(flat_model > 0)
+            negative = np.sum(flat_model < 0)
+            print(f"   Positive values: {positive:,}")
+            print(f"   Negative values: {negative:,}")
+    # Analyze experience replay if present
+    if 'experience' in data:
+        experience = data['experience']
+        print(f"\n🎮 Experience Replay Buffer:")
+        print(f"   Number of experiences: {len(experience):,}")
+        if experience:
+            # Show first few experiences
+            print(f"   Sample experience (first):")
+            if hasattr(experience[0], '__len__'):
+                print(f"     Length: {len(experience[0])}")
+                if len(experience[0]) > 0:
+                    print(f"     First element type: {type(experience[0][0])}")
+    # Check for other data
+    for key in data.keys():
+        if key not in ['model', 'experience']:
+            value = data[key]
+            print(f"\n🔍 {key}:")
+            print(f"   Type: {type(value)}")
+            if isinstance(value, (list, tuple, deque)):
+                print(f"   Length: {len(value)}")
+            elif isinstance(value, dict):
+                print(f"   Keys: {list(value.keys())[:5]}..." if len(value) > 5 else f"   Keys: {list(value.keys())}")
+            elif isinstance(value, np.ndarray):
+                print(f"   Shape: {value.shape}")
+def examine_specific_states(model, num_states=5):
+    """
+    Examine specific state-action values in the model.
+    Args:
+        model (np.ndarray): The AI model
+        num_states (int): Number of states to examine
+    """
+    print(f"\n🔬 Examining {num_states} specific states:")
+    print("-"*40)
+    # Find states with non-zero values
+    non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
+    if len(non_zero_indices) > 0:
+        print(f"Found {len(non_zero_indices):,} states with learned values")
+        # Sample some states to examine
+        if len(non_zero_indices) > num_states:
+            sample_indices = np.random.choice(non_zero_indices, num_states, replace=False)
+        else:
+            sample_indices = non_zero_indices
+        for i, state_idx in enumerate(sample_indices):
+            q_values = model[state_idx]
+            non_zero_q = q_values[q_values != 0]
+            if len(non_zero_q) > 0:
+                print(f"\nState {i+1} (Index {state_idx}):")
+                print(f"  Non-zero Q-values: {len(non_zero_q)}")
+                print(f"  Actions with values:")
+                for action in np.where(q_values != 0)[0]:
+                    print(f"    Action {action}: {q_values[action]:.4f}")
+    else:
+        print("No states with learned values found yet.")
+def decode_state(state_index):
+    """
+    Convert a state index back to a board representation.
+    This assumes the same encoding used in the game.
+    Args:
+        state_index (int): The encoded state index
+    Returns:
+        list: Board representation (0=empty, 1=X, 2=O)
+    """
+    board = [0] * 9
+    temp_index = state_index
+    for i in range(9):
+        board[i] = temp_index % 3
+        temp_index //= 3
+    return board
+def display_board(board):
+    """
+    Display a Tic-Tac-Toe board in human-readable format.
+    Args:
+        board (list): Board representation
+    """
+    symbols = {0: '.', 1: 'X', 2: 'O'}
+    print("Board state:")
+    for row in range(3):
+        row_chars = [symbols[board[row*3 + col]] for col in range(3)]
+        print(" " + " | ".join(row_chars))
+        if row < 2:
+            print(" " + "-" * 9)
+def explore_model_interactively(model):
+    """
+    Interactive exploration of the model.
+    Args:
+        model (np.ndarray): The AI model
+    """
+    print("\n🎯 INTERACTIVE EXPLORATION")
+    print("="*60)
+    while True:
+        print("\nOptions:")
+        print("1. Look up a specific state")
+        print("2. Find states with highest Q-values")
+        print("3. Find best action for a given state")
+        print("4. Exit exploration")
+        choice = input("\nEnter your choice (1-4): ").strip()
+        if choice == '1':
+            try:
+                state_idx = int(input("Enter state index (0-19682): "))
+                if 0 <= state_idx < model.shape[0]:
+                    board = decode_state(state_idx)
+                    display_board(board)
+                    q_values = model[state_idx]
+                    print(f"\nQ-values for state {state_idx}:")
+                    for action in range(9):
+                        if q_values[action] != 0:
+                            print(f"  Action {action} (row {action//3}, col {action%3}): {q_values[action]:.4f}")
+                    # Show best action
+                    best_action = np.argmax(q_values)
+                    print(f"\nBest action: {best_action} (row {best_action//3}, col {best_action%3})")
+                else:
+                    print("Invalid state index!")
+            except ValueError:
+                print("Please enter a valid number!")
+        elif choice == '2':
+            try:
+                num_states = int(input("How many top states? (1-100): "))
+                num_states = max(1, min(100, num_states))
+                # Find states with maximum Q-values
+                max_q_per_state = np.max(model, axis=1)
+                top_indices = np.argsort(max_q_per_state)[-num_states:][::-1]
+                print(f"\nTop {num_states} states with highest Q-values:")
+                for i, idx in enumerate(top_indices[:10]):  # Show first 10
+                    max_q = max_q_per_state[idx]
+                    if max_q > 0:
+                        board = decode_state(idx)
+                        print(f"\n{i+1}. State {idx} (max Q: {max_q:.4f})")
+                        display_board(board)
+            except ValueError:
+                print("Please enter a valid number!")
+        elif choice == '3':
+            # Create a board manually
+            print("\nEnter board state (9 numbers, 0=empty, 1=X, 2=O)")
+            print("Example: 0 0 1 0 2 0 0 0 0")
+            try:
+                board_input = input("Board: ").strip()
+                if len(board_input) == 0:
+                    # Use default example
+                    board = [0, 0, 1, 0, 2, 0, 0, 0, 0]
+                else:
+                    board = [int(x) for x in board_input.split()]
+                if len(board) != 9 or any(x not in [0, 1, 2] for x in board):
+                    raise ValueError("Invalid board")
+                display_board(board)
+                # Convert to state index
+                state_idx = 0
+                for i, cell in enumerate(board):
+                    state_idx += cell * (3 ** i)
+                q_values = model[state_idx]
+                # Only show available moves
+                available_moves = [i for i, cell in enumerate(board) if cell == 0]
+                print("\nAvailable moves and their Q-values:")
+                for move in available_moves:
+                    q_val = q_values[move]
+                    row, col = divmod(move, 3)
+                    print(f"  Move {move} (row {row}, col {col}): {q_val:.4f}")
+                if available_moves:
+                    best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
+                    row, col = divmod(best_move, 3)
+                    print(f"\nRecommended move: {best_move} (row {row}, col {col})")
+                else:
+                    print("No available moves!")
+            except Exception as e:
+                print(f"Error: {e}")
+        elif choice == '4':
+            print("Exiting interactive exploration.")
+            break
+        else:
+            print("Invalid choice!")
+def save_model_summary(data, filename="model_summary.json"):
+    """
+    Save a summary of the model to a JSON file.
+    Args:
+        data (dict): The loaded pickle data
+        filename (str): Output JSON filename
+    """
+    if not data:
+        return
+    summary = {}
+    if 'model' in data:
+        model = data['model']
+        summary['model'] = {
+            'shape': model.shape,
+            'size': int(model.size),
+            'non_zero_entries': int(np.count_nonzero(model)),
+            'sparsity': float((np.sum(model == 0) / model.size) * 100)
+        }
+    if 'experience' in data:
+        experience = data['experience']
+        summary['experience'] = {
+            'count': len(experience)
+        }
+    try:
+        with open(filename, 'w') as f:
+            json.dump(summary, f, indent=2)
+        print(f"\n💾 Model summary saved to '{filename}'")
+    except Exception as e:
+        print(f"Error saving summary: {e}")
+def main():
+    """
+    Main function to load and analyze the pickle file.
+    """
+    print("🔍 Tic-Tac-Toe AI Model Analyzer")
+    print("="*60)
+    # Try to load the pickle file
+    filename = "ttt_ai_model.pkl"
+    data = load_pickle_file(filename)
+    if data:
+        # Analyze the model
+        analyze_model(data)
+        # If model exists, do more detailed analysis
+        if 'model' in data:
+            # Examine specific states
+            examine_specific_states(data['model'])
+            # Interactive exploration
+            explore = input("\nWould you like to explore the model interactively? (y/n): ").strip().lower()
+            if explore == 'y':
+                explore_model_interactively(data['model'])
+            # Save summary
+            save = input("\nWould you like to save a summary? (y/n): ").strip().lower()
+            if save == 'y':
+                save_model_summary(data)
+        # Additional file info
+        print("\n📄 File Information:")
+        print(f"   File size: {os.path.getsize(filename):,} bytes")
+        print(f"   Last modified: {os.path.getmtime(filename):.0f}")
+    print("\n" + "="*60)
+    print("Analysis complete!")
+if __name__ == "__main__":
+    main()

Imitation Learning Tic Tac Toe AI 2/create_pretrained_ai.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import pickle
+import numpy as np
+import random
+def create_basic_ai_knowledge():
+    """Create a basic Tic-Tac-Toe AI with fundamental strategies"""
+    q_table = {}
+    # 1. Empty board - prefer center and corners
+    empty_board = (0,0,0,0,0,0,0,0,0)
+    q_values = [0.0] * 9
+    q_values[4] = 0.8  # Center is best
+    q_values[0] = q_values[2] = q_values[6] = q_values[8] = 0.7  # Corners are good
+    q_values[1] = q_values[3] = q_values[5] = q_values[7] = 0.5  # Edges are okay
+    q_table[empty_board] = q_values
+    # 2. Opponent in center - take corners
+    center_taken = (0,0,0,0,1,0,0,0,0)  # X in center
+    q_values = [0.0] * 9
+    q_values[0] = q_values[2] = q_values[6] = q_values[8] = 0.9  # Corners are best
+    q_table[center_taken] = q_values
+    # 3. Winning moves - very high value
+    # Example: two O's in a row/col/diag
+    winning_patterns = [
+        # Horizontal
+        (2,2,0,0,0,0,0,0,0),  # Need position 2
+        (0,2,2,0,0,0,0,0,0),  # Need position 0
+        (2,0,2,0,0,0,0,0,0),  # Need position 1
+        # Vertical
+        (2,0,0,2,0,0,0,0,0),  # Need position 6
+        (0,2,0,0,2,0,0,0,0),  # Need position 7
+        (0,0,2,0,0,2,0,0,0),  # Need position 8
+        # Diagonal
+        (2,0,0,0,2,0,0,0,0),  # Need position 8
+        (0,0,2,0,2,0,0,0,0),  # Need position 6
+    ]
+    for board in winning_patterns:
+        q_values = [0.0] * 9
+        # Find empty spot that completes the line
+        for i in range(9):
+            if board[i] == 0:
+                # Check if this completes three in a row
+                test_board = list(board)
+                test_board[i] = 2
+                # Check if this is a winning move
+                winning = False
+                lines = [
+                    [0,1,2], [3,4,5], [6,7,8],  # Rows
+                    [0,3,6], [1,4,7], [2,5,8],  # Columns
+                    [0,4,8], [2,4,6]            # Diagonals
+                ]
+                for line in lines:
+                    if (test_board[line[0]] == test_board[line[1]] ==
+                        test_board[line[2]] == 2):
+                        winning = True
+                        break
+                if winning:
+                    q_values[i] = 1.0  # Very high value for winning move
+        q_table[board] = q_values
+    # 4. Blocking moves - high value
+    blocking_patterns = [
+        # Block horizontal
+        (1,1,0,0,0,0,0,0,0),  # Block at 2
+        (0,1,1,0,0,0,0,0,0),  # Block at 0
+        (1,0,1,0,0,0,0,0,0),  # Block at 1
+        # Block vertical
+        (1,0,0,1,0,0,0,0,0),  # Block at 6
+        (0,1,0,0,1,0,0,0,0),  # Block at 7
+        (0,0,1,0,0,1,0,0,0),  # Block at 8
+    ]
+    for board in blocking_patterns:
+        q_values = [0.0] * 9
+        # Find blocking move
+        for i in range(9):
+            if board[i] == 0:
+                # Check if this blocks opponent
+                test_board = list(board)
+                test_board[i] = 1  # Temporarily place opponent's piece
+                # Check if opponent would win
+                opponent_wins = False
+                lines = [
+                    [0,1,2], [3,4,5], [6,7,8],
+                    [0,3,6], [1,4,7], [2,5,8],
+                    [0,4,8], [2,4,6]
+                ]
+                for line in lines:
+                    if (test_board[line[0]] == test_board[line[1]] ==
+                        test_board[line[2]] == 1):
+                        opponent_wins = True
+                        break
+                if opponent_wins:
+                    q_values[i] = 0.9  # High value for blocking
+        q_table[board] = q_values
+    # Save the pre-trained AI
+    data = {
+        'q_table': q_table,
+        'training_history': [],
+        'player_symbol': 2
+    }
+    with open('ttt_ai_pretrained.pkl', 'wb') as f:
+        pickle.dump(data, f)
+    print(f"Created pre-trained AI with {len(q_table)} board states")
+    print("Basic strategies included:")
+    print("1. Prefer center and corners")
+    print("2. Take corners when opponent has center")
+    print("3. Recognize winning moves")
+    print("4. Recognize blocking moves")
+if __name__ == '__main__':
+    create_basic_ai_knowledge()

Imitation Learning Tic Tac Toe AI 2/improved_game.py ADDED Viewed

	@@ -0,0 +1,529 @@

+import sys
+import numpy as np
+import random
+from collections import defaultdict
+import pickle
+import os
+from PyQt5.QtWidgets import *
+from PyQt5.QtCore import *
+from PyQt5.QtGui import *
+class TicTacToeAI:
+    """Improved AI with proper imitation learning"""
+    def __init__(self, player_symbol=2):  # Default is O
+        self.q_table = defaultdict(lambda: [0.0] * 9)  # State -> [Q-values for 9 moves]
+        self.learning_rate = 0.3  # Increased for faster learning
+        self.exploration_rate = 0.3  # Start with exploration
+        self.discount_factor = 0.9
+        self.player_symbol = player_symbol
+        self.last_state = None
+        self.last_action = None
+        self.training_history = []
+        self.load_model()
+    def board_to_key(self, board):
+        """Convert board to hashable key"""
+        return tuple(board)
+    def get_available_moves(self, board):
+        """Get list of available positions"""
+        return [i for i, cell in enumerate(board) if cell == 0]
+    def choose_action(self, board, available_moves):
+        """Choose action using epsilon-greedy policy"""
+        board_key = self.board_to_key(board)
+        # Exploration: random move
+        if random.random() < self.exploration_rate:
+            action = random.choice(available_moves)
+        # Exploitation: best known move
+        else:
+            q_values = self.q_table[board_key]
+            # Filter to available moves
+            available_q = [(q_values[move], move) for move in available_moves]
+            # Choose move with highest Q-value
+            action = max(available_q, key=lambda x: x[0])[1]
+        # Store for learning
+        self.last_state = board_key
+        self.last_action = action
+        return action
+    def learn(self, reward, next_board, game_over):
+        """Q-learning update"""
+        if self.last_state is None or self.last_action is None:
+            return
+        board_key = self.last_state
+        action = self.last_action
+        # Current Q-value
+        current_q = self.q_table[board_key][action]
+        if game_over:
+            # Terminal state, no future rewards
+            future_q = 0
+        else:
+            # Estimate future reward
+            next_key = self.board_to_key(next_board)
+            next_available = self.get_available_moves(next_board)
+            if next_available:
+                future_q = max(self.q_table[next_key][move] for move in next_available)
+            else:
+                future_q = 0
+        # Q-learning update
+        new_q = current_q + self.learning_rate * (reward + self.discount_factor * future_q - current_q)
+        self.q_table[board_key][action] = new_q
+        # Record for analysis
+        self.training_history.append({
+            'state': board_key,
+            'action': action,
+            'reward': reward,
+            'new_q': new_q
+        })
+        # Clear for next move
+        self.last_state = None
+        self.last_action = None
+    def learn_from_observation(self, board_before, action, board_after, winner):
+        """Learn by observing human player moves"""
+        board_key = self.board_to_key(board_before)
+        # Determine reward based on game outcome
+        if winner == self.player_symbol:  # AI's symbol won
+            reward = 1.0
+        elif winner == 3 - self.player_symbol:  # Opponent won
+            reward = -1.0
+        elif winner is None:  # Draw
+            reward = 0.1
+        else:
+            reward = 0
+        # Update Q-value
+        current_q = self.q_table[board_key][action]
+        new_q = current_q + self.learning_rate * (reward - current_q)
+        self.q_table[board_key][action] = new_q
+    def save_model(self):
+        """Save Q-table to file"""
+        try:
+            # Convert defaultdict to regular dict for pickling
+            q_table_dict = dict(self.q_table)
+            data = {
+                'q_table': q_table_dict,
+                'training_history': self.training_history[-1000:],  # Keep last 1000
+                'player_symbol': self.player_symbol
+            }
+            with open('ttt_ai_improved.pkl', 'wb') as f:
+                pickle.dump(data, f)
+            print(f"Model saved with {len(q_table_dict)} states")
+        except Exception as e:
+            print(f"Error saving model: {e}")
+    def load_model(self):
+        """Load Q-table from file"""
+        filename = 'ttt_ai_improved.pkl'
+        if os.path.exists(filename):
+            try:
+                with open(filename, 'rb') as f:
+                    data = pickle.load(f)
+                self.q_table = defaultdict(lambda: [0.0] * 9, data.get('q_table', {}))
+                self.training_history = data.get('training_history', [])
+                self.player_symbol = data.get('player_symbol', 2)
+                print(f"Model loaded with {len(self.q_table)} states")
+            except Exception as e:
+                print(f"Error loading model: {e}")
+class TicTacToeGame:
+    """Game logic - unchanged"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.board = [0] * 9
+        self.current_player = 1
+        self.winner = None
+        self.game_over = False
+        self.moves = 0
+    def make_move(self, position):
+        if self.board[position] != 0 or self.game_over:
+            return False
+        self.board[position] = self.current_player
+        self.moves += 1
+        self.winner = self.check_winner()
+        if self.winner or self.moves == 9:
+            self.game_over = True
+        else:
+            self.current_player = 3 - self.current_player
+        return True
+    def check_winner(self):
+        winning_combinations = [
+            [0, 1, 2], [3, 4, 5], [6, 7, 8],
+            [0, 3, 6], [1, 4, 7], [2, 5, 8],
+            [0, 4, 8], [2, 4, 6]
+        ]
+        for combo in winning_combinations:
+            if (self.board[combo[0]] == self.board[combo[1]] ==
+                self.board[combo[2]] != 0):
+                return 'X' if self.board[combo[0]] == 1 else 'O'
+        return None
+    def get_board_state(self):
+        return self.board.copy()
+class ImprovedGame(QMainWindow):
+    """Improved game with working imitation learning"""
+    def __init__(self):
+        super().__init__()
+        self.game = TicTacToeGame()
+        self.ai = TicTacToeAI(player_symbol=2)  # AI plays as O
+        # Training parameters
+        self.training_mode = True
+        self.observation_mode = True  # Learn from human moves
+        self.games_played = 0
+        self.ai_wins = 0
+        self.human_wins = 0
+        self.ties = 0
+        self.init_ui()
+        self.start_new_game()
+    def init_ui(self):
+        self.setWindowTitle('Improved Imitation Learning Tic-Tac-Toe')
+        self.setGeometry(100, 100, 450, 600)
+        central_widget = QWidget()
+        self.setCentralWidget(central_widget)
+        layout = QVBoxLayout()
+        # Game board
+        self.board_widget = self.create_board()
+        layout.addWidget(self.board_widget)
+        # Status
+        status_layout = QHBoxLayout()
+        self.status_label = QLabel("Your turn (X)")
+        self.status_label.setFont(QFont('Arial', 14))
+        status_layout.addWidget(self.status_label)
+        self.stats_label = QLabel("Games: 0 | AI: 0 | You: 0 | Ties: 0")
+        status_layout.addWidget(self.stats_label)
+        layout.addLayout(status_layout)
+        # Training controls
+        controls = QHBoxLayout()
+        self.train_btn = QPushButton("Training: ON")
+        self.train_btn.clicked.connect(self.toggle_training)
+        controls.addWidget(self.train_btn)
+        self.observe_btn = QPushButton("Learn from You: ON")
+        self.observe_btn.clicked.connect(self.toggle_observation)
+        controls.addWidget(self.observe_btn)
+        self.new_game_btn = QPushButton("New Game")
+        self.new_game_btn.clicked.connect(self.start_new_game)
+        controls.addWidget(self.new_game_btn)
+        self.save_btn = QPushButton("Save AI")
+        self.save_btn.clicked.connect(self.save_ai)
+        controls.addWidget(self.save_btn)
+        layout.addLayout(controls)
+        # Learning parameters
+        params = QGridLayout()
+        params.addWidget(QLabel("Learning Rate:"), 0, 0)
+        self.lr_slider = QSlider(Qt.Horizontal)
+        self.lr_slider.setRange(1, 50)
+        self.lr_slider.setValue(int(self.ai.learning_rate * 100))
+        self.lr_slider.valueChanged.connect(self.update_learning_rate)
+        params.addWidget(self.lr_slider, 0, 1)
+        params.addWidget(QLabel("Exploration:"), 1, 0)
+        self.exp_slider = QSlider(Qt.Horizontal)
+        self.exp_slider.setRange(0, 100)
+        self.exp_slider.setValue(int(self.ai.exploration_rate * 100))
+        self.exp_slider.valueChanged.connect(self.update_exploration)
+        params.addWidget(self.exp_slider, 1, 1)
+        layout.addLayout(params)
+        # Learning log
+        self.log_text = QTextEdit()
+        self.log_text.setMaximumHeight(150)
+        self.log_text.setReadOnly(True)
+        layout.addWidget(self.log_text)
+        central_widget.setLayout(layout)
+        # AI move timer
+        self.ai_timer = QTimer()
+        self.ai_timer.timeout.connect(self.ai_move)
+        self.log("AI initialized. Play as X to train the AI!")
+        self.log(f"AI knows {len(self.ai.q_table)} board states")
+    def create_board(self):
+        widget = QWidget()
+        grid = QGridLayout()
+        grid.setSpacing(5)
+        self.buttons = []
+        for i in range(9):
+            btn = QPushButton('')
+            btn.setFixedSize(100, 100)
+            btn.setFont(QFont('Arial', 24))
+            btn.clicked.connect(lambda checked, pos=i: self.human_move(pos))
+            row, col = divmod(i, 3)
+            grid.addWidget(btn, row, col)
+            self.buttons.append(btn)
+        widget.setLayout(grid)
+        return widget
+    def update_board(self):
+        """Update button display from game state"""
+        symbols = {0: '', 1: 'X', 2: 'O'}
+        colors = {0: 'black', 1: 'red', 2: 'blue'}
+        for i, btn in enumerate(self.buttons):
+            symbol = symbols[self.game.board[i]]
+            color = colors[self.game.board[i]]
+            btn.setText(symbol)
+            btn.setStyleSheet(f"color: {color}; font-weight: bold;")
+    def human_move(self, position):
+        """Handle human player move"""
+        if self.game.game_over or self.game.current_player != 1:
+            return
+        # Record board before move for learning
+        board_before = self.game.get_board_state()
+        if self.game.make_move(position):
+            self.update_board()
+            # If learning from observation is enabled
+            if self.training_mode and self.observation_mode:
+                # The AI learns from the human move
+                self.ai.learn_from_observation(
+                    board_before,
+                    position,
+                    self.game.get_board_state(),
+                    None  # Game not over yet
+                )
+                self.log(f"AI observed your move at {position}")
+            if self.game.game_over:
+                self.end_game()
+            else:
+                # AI's turn
+                self.status_label.setText("AI thinking...")
+                self.ai_timer.start(300)  # Shorter delay
+    def ai_move(self):
+        """Handle AI player move"""
+        self.ai_timer.stop()
+        if self.game.game_over or self.game.current_player != 2:
+            return
+        # Get available moves
+        available_moves = [i for i, cell in enumerate(self.game.board) if cell == 0]
+        if available_moves:
+            # Choose action
+            action = self.ai.choose_action(self.game.board, available_moves)
+            # Record state before move for Q-learning
+            board_before = self.game.get_board_state()
+            if self.game.make_move(action):
+                self.update_board()
+                # Q-learning update
+                if self.training_mode:
+                    # Determine reward
+                    if self.game.game_over:
+                        if self.game.winner == 'O':
+                            reward = 1.0  # AI won
+                        elif self.game.winner == 'X':
+                            reward = -1.0  # AI lost
+                        else:
+                            reward = 0.1  # Draw
+                    else:
+                        reward = 0  # Intermediate move
+                    # Update Q-values
+                    self.ai.learn(reward, self.game.get_board_state(), self.game.game_over)
+                if self.game.game_over:
+                    self.end_game()
+                else:
+                    self.status_label.setText("Your turn (X)")
+                    self.log(f"AI moved to {action}")
+    def end_game(self):
+        """Handle game end"""
+        winner = self.game.winner
+        # Update statistics
+        self.games_played += 1
+        if winner == 'X':
+            self.human_wins += 1
+            result = "You win!"
+        elif winner == 'O':
+            self.ai_wins += 1
+            result = "AI wins!"
+            # Strong positive reinforcement for winning
+            if self.training_mode:
+                self.log("AI won! Giving strong positive reward")
+        else:
+            self.ties += 1
+            result = "It's a tie!"
+        self.update_stats()
+        # Final Q-learning update for the last move
+        if self.training_mode and winner is not None:
+            # Determine final reward for AI
+            final_reward = 1.0 if winner == 'O' else -1.0 if winner == 'X' else 0.1
+            self.ai.learn(final_reward, self.game.board, True)
+            # Also learn from the complete game if observation mode is on
+            if self.observation_mode:
+                self.log(f"AI learned from {result}")
+        # Update status
+        self.status_label.setText(result)
+        # Highlight winning cells
+        if winner:
+            self.highlight_winner()
+        # Offer new game
+        QTimer.singleShot(1000, self.offer_new_game)
+    def highlight_winner(self):
+        """Highlight winning combination"""
+        winning_combinations = [
+            [0, 1, 2], [3, 4, 5], [6, 7, 8],
+            [0, 3, 6], [1, 4, 7], [2, 5, 8],
+            [0, 4, 8], [2, 4, 6]
+        ]
+        for combo in winning_combinations:
+            if (self.game.board[combo[0]] == self.game.board[combo[1]] ==
+                self.game.board[combo[2]] != 0):
+                for pos in combo:
+                    self.buttons[pos].setStyleSheet(
+                        "background-color: lightgreen; font-weight: bold;"
+                    )
+                break
+    def offer_new_game(self):
+        """Ask if player wants to play again"""
+        msg = QMessageBox()
+        msg.setWindowTitle("Game Over")
+        msg.setText(f"{self.status_label.text()}")
+        msg.setInformativeText("Play again?")
+        msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
+        if msg.exec_() == QMessageBox.Yes:
+            self.start_new_game()
+    def start_new_game(self):
+        """Start a new game"""
+        self.game.reset()
+        self.update_board()
+        # Reset button colors
+        for btn in self.buttons:
+            btn.setStyleSheet("")
+        # Always let human start
+        self.status_label.setText("Your turn (X)")
+        # Gradually reduce exploration
+        if self.games_played > 20:
+            self.ai.exploration_rate = max(0.1, self.ai.exploration_rate * 0.95)
+            self.exp_slider.setValue(int(self.ai.exploration_rate * 100))
+        self.log(f"New game started (Game {self.games_played + 1})")
+        self.log(f"AI exploration: {self.ai.exploration_rate:.2f}")
+    def toggle_training(self):
+        """Toggle training mode"""
+        self.training_mode = not self.training_mode
+        self.train_btn.setText(f"Training: {'ON' if self.training_mode else 'OFF'}")
+        self.log(f"Training mode {'enabled' if self.training_mode else 'disabled'}")
+    def toggle_observation(self):
+        """Toggle learning from human moves"""
+        self.observation_mode = not self.observation_mode
+        self.observe_btn.setText(f"Learn from You: {'ON' if self.observation_mode else 'OFF'}")
+        self.log(f"Learning from your moves {'enabled' if self.observation_mode else 'disabled'}")
+    def update_learning_rate(self, value):
+        """Update learning rate"""
+        self.ai.learning_rate = value / 100.0
+        self.log(f"Learning rate: {self.ai.learning_rate:.2f}")
+    def update_exploration(self, value):
+        """Update exploration rate"""
+        self.ai.exploration_rate = value / 100.0
+        self.log(f"Exploration rate: {self.ai.exploration_rate:.2f}")
+    def update_stats(self):
+        """Update statistics display"""
+        self.stats_label.setText(
+            f"Games: {self.games_played} | "
+            f"AI: {self.ai_wins} | "
+            f"You: {self.human_wins} | "
+            f"Ties: {self.ties}"
+        )
+    def save_ai(self):
+        """Save AI model"""
+        self.ai.save_model()
+        self.log(f"AI model saved! Knows {len(self.ai.q_table)} states")
+    def log(self, message):
+        """Add message to log"""
+        self.log_text.append(f"[Game {self.games_played}] {message}")
+def main():
+    app = QApplication(sys.argv)
+    app.setStyle('Fusion')
+    # Set a nice theme
+    palette = QPalette()
+    palette.setColor(QPalette.Window, QColor(240, 240, 240))
+    palette.setColor(QPalette.WindowText, Qt.black)
+    app.setPalette(palette)
+    game = ImprovedGame()
+    game.show()
+    sys.exit(app.exec_())
+if __name__ == '__main__':
+    main()

Imitation Learning Tic Tac Toe AI 2/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ PyQt5
2	+ numpy

Imitation Learning Tic Tac Toe AI 2/ttt_ai_model_improved.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0b251454a01165321853d8e32658ef4a96b694bdb74eed00f4060d7cd331743
+size 1417366

Imitation Learning Tic Tac Toe AI 2/ttt_diagnostic.py ADDED Viewed

	@@ -0,0 +1,351 @@

+import pickle
+import numpy as np
+import os
+def load_ai_model():
+    """Load the AI model from pickle file"""
+    filename = "ttt_ai_model.pkl"
+    if not os.path.exists(filename):
+        print("Model file not found!")
+        return None
+    with open(filename, 'rb') as f:
+        data = pickle.load(f)
+    return data['model']
+def decode_state(state_index):
+    """Convert state index to board representation"""
+    board = [0] * 9
+    temp_index = state_index
+    for i in range(9):
+        board[i] = temp_index % 3
+        temp_index //= 3
+    return board
+def display_board(board):
+    """Display Tic-Tac-Toe board"""
+    symbols = {0: '.', 1: 'X', 2: 'O'}
+    print("Current board:")
+    for row in range(3):
+        row_chars = [symbols[board[row*3 + col]] for col in range(3)]
+        print(" " + " | ".join(row_chars))
+        if row < 2:
+            print(" " + "-" * 9)
+def test_ai_with_common_scenarios(model):
+    """Test AI with common Tic-Tac-Toe scenarios"""
+    print("\n🤖 TESTING AI WITH COMMON SCENARIOS")
+    print("="*50)
+    test_cases = [
+        # Empty board
+        {
+            "name": "Empty board - first move",
+            "board": [0,0,0,0,0,0,0,0,0],
+            "expected": "Center (4) or corners (0,2,6,8)"
+        },
+        # Center taken by opponent
+        {
+            "name": "Opponent took center",
+            "board": [0,0,0,0,1,0,0,0,0],
+            "expected": "A corner (0,2,6,8)"
+        },
+        # Winning move for AI
+        {
+            "name": "AI can win in one move",
+            "board": [2,1,0,  # O X .
+                      1,2,0,  # X O .
+                      0,0,0], # . . .
+            "expected": "Move 8 to complete diagonal"
+        },
+        # Block opponent's winning move
+        {
+            "name": "Block opponent's winning move",
+            "board": [1,0,0,  # X . .
+                      1,2,0,  # X O .
+                      0,0,0], # . . .
+            "expected": "Move 6 to block vertical"
+        },
+        # Fork opportunity
+        {
+            "name": "Fork opportunity",
+            "board": [2,0,1,  # O . X
+                      0,1,0,  # . X .
+                      0,0,0], # . . .
+            "expected": "Move 8 to create fork"
+        }
+    ]
+    for test in test_cases:
+        print(f"\n📋 {test['name']}")
+        display_board(test['board'])
+        # Convert board to state index
+        state_idx = 0
+        for i, cell in enumerate(test['board']):
+            state_idx += cell * (3 ** i)
+        # Get Q-values for this state
+        q_values = model[state_idx]
+        # Get available moves
+        available_moves = [i for i, cell in enumerate(test['board']) if cell == 0]
+        if available_moves:
+            print("\nAvailable moves and Q-values:")
+            for move in available_moves:
+                q_val = q_values[move]
+                row, col = divmod(move, 3)
+                symbol = "⚠️" if q_val > 0 else "  "
+                print(f"{symbol} Move {move} (row {row}, col {col}): {q_val:.4f}")
+            # AI's recommended move
+            best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
+            row, col = divmod(best_move, 3)
+            print(f"\n🤖 AI's choice: Move {best_move} (row {row}, col {col})")
+            print(f"📋 Expected: {test['expected']}")
+            # Check if AI learned something useful
+            best_q = q_values[best_move]
+            if best_q > 0:
+                print("✅ AI has positive association with this move")
+            elif best_q < 0:
+                print("❌ AI has negative association with this move (thinks it's bad)")
+            else:
+                print("➖ AI has no learning for this move")
+        else:
+            print("No available moves!")
+def analyze_learning_patterns(model):
+    """Analyze what patterns the AI has learned"""
+    print("\n🔍 ANALYZING LEARNING PATTERNS")
+    print("="*50)
+    # Find all states with non-zero Q-values
+    non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
+    print(f"Total states with learning: {len(non_zero_indices)}")
+    # Categorize by game phase
+    phases = {
+        "early": [],    # 0-2 moves made
+        "mid": [],      # 3-5 moves made
+        "late": []      # 6-8 moves made
+    }
+    for idx in non_zero_indices:
+        board = decode_state(idx)
+        moves_made = sum(1 for cell in board if cell != 0)
+        if moves_made <= 2:
+            phases["early"].append(idx)
+        elif moves_made <= 5:
+            phases["mid"].append(idx)
+        else:
+            phases["late"].append(idx)
+    print(f"\nLearning by game phase:")
+    print(f"  Early game (0-2 moves): {len(phases['early'])} states")
+    print(f"  Mid game (3-5 moves): {len(phases['mid'])} states")
+    print(f"  Late game (6-8 moves): {len(phases['late'])} states")
+    # Analyze Q-value distribution
+    all_q_values = model[non_zero_indices].flatten()
+    non_zero_q = all_q_values[all_q_values != 0]
+    if len(non_zero_q) > 0:
+        print(f"\nQ-value analysis:")
+        print(f"  Total Q-values: {len(non_zero_q)}")
+        print(f"  Positive Q-values: {np.sum(non_zero_q > 0)}")
+        print(f"  Negative Q-values: {np.sum(non_zero_q < 0)}")
+        print(f"  Average Q-value: {np.mean(non_zero_q):.4f}")
+        print(f"  Most positive: {np.max(non_zero_q):.4f}")
+        print(f"  Most negative: {np.min(non_zero_q):.4f}")
+    # Show examples of what AI learned
+    print("\n📚 Examples of learned states:")
+    # Find states with positive Q-values
+    positive_states = []
+    for idx in non_zero_indices:
+        if np.any(model[idx] > 0):
+            positive_states.append(idx)
+    if positive_states:
+        print(f"\nFound {len(positive_states)} states with positive associations")
+        for i, idx in enumerate(positive_states[:3]):  # Show first 3
+            board = decode_state(idx)
+            print(f"\nExample {i+1}:")
+            display_board(board)
+            q_values = model[idx]
+            positive_moves = np.where(q_values > 0)[0]
+            print("Moves AI thinks are good:")
+            for move in positive_moves:
+                print(f"  Move {move} (row {move//3}, col {move%3}): {q_values[move]:.4f}")
+    else:
+        print("No positive associations found - AI hasn't learned winning strategies yet")
+def check_for_specific_patterns(model):
+    """Check if AI has learned specific Tic-Tac-Toe strategies"""
+    print("\n🎯 CHECKING FOR SPECIFIC STRATEGIES")
+    print("="*50)
+    strategies = {
+        "prefer_center": 0,
+        "prefer_corners": 0,
+        "prefer_edges": 0,
+        "block_opponent": 0,
+        "create_fork": 0,
+        "avoid_losing": 0
+    }
+    # Check common winning/blocking patterns
+    patterns_to_check = [
+        # Center preference
+        ([0,0,0,0,0,0,0,0,0], [4], "prefer_center"),
+        # Corner openings
+        ([0,0,0,0,1,0,0,0,0], [0,2,6,8], "prefer_corners"),
+        # Block vertical
+        ([1,0,0,1,2,0,0,0,0], [6], "block_opponent"),
+        # Block horizontal
+        ([1,1,0,0,2,0,0,0,0], [2], "block_opponent"),
+        # Block diagonal
+        ([1,0,0,0,1,0,0,0,0], [8], "block_opponent"),
+    ]
+    for board_pattern, good_moves, strategy in patterns_to_check:
+        state_idx = 0
+        for i, cell in enumerate(board_pattern):
+            state_idx += cell * (3 ** i)
+        q_values = model[state_idx]
+        available_moves = [i for i, cell in enumerate(board_pattern) if cell == 0]
+        if available_moves:
+            # Check if AI prefers any of the good moves
+            best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
+            if best_move in good_moves:
+                strategies[strategy] += 1
+                print(f"✅ AI correctly prefers {strategy.replace('_', ' ')} in this scenario")
+            else:
+                print(f"❌ AI doesn't recognize {strategy.replace('_', ' ')}")
+    print(f"\nStrategy recognition summary:")
+    for strategy, count in strategies.items():
+        print(f"  {strategy}: {count}/1")
+def train_ai_offline(model, num_games=1000):
+    """Simulate games to improve the AI offline"""
+    print("\n🎮 SIMULATING OFFLINE TRAINING")
+    print("="*50)
+    print(f"Simulating {num_games} games of self-play...")
+    # Simple self-play simulation
+    import random
+    # We'll create a simple improvement by adding some basic strategies
+    original_non_zero = np.count_nonzero(model)
+    # Add some basic Tic-Tac-Toe knowledge
+    # Center is good
+    empty_board_idx = 0  # All zeros
+    model[empty_board_idx][4] = 0.1  # Center is good
+    # Corners are good when center is taken
+    center_taken_idx = 3**4  # Only center is 1
+    for corner in [0, 2, 6, 8]:
+        model[center_taken_idx][corner] = 0.08
+    # Blocking is good
+    # Example: opponent has two in a row
+    for i in range(9):
+        board = [0] * 9
+        board[i] = 1
+        board[(i+3)%9] = 1
+        if board[6] == 0:  # Check if third in column is empty
+            state_idx = 0
+            for j, cell in enumerate(board):
+                state_idx += cell * (3 ** j)
+            blocking_move = 6
+            model[state_idx][blocking_move] = 0.15
+    new_non_zero = np.count_nonzero(model)
+    improvement = new_non_zero - original_non_zero
+    print(f"Added {improvement} new learned values")
+    print("Basic Tic-Tac-Toe strategies have been added to the AI")
+    return model
+def save_improved_model(model):
+    """Save the improved model"""
+    filename = "ttt_ai_model_improved.pkl"
+    # Load existing data to preserve experience buffer
+    original_filename = "ttt_ai_model.pkl"
+    if os.path.exists(original_filename):
+        with open(original_filename, 'rb') as f:
+            data = pickle.load(f)
+    else:
+        data = {'model': model, 'experience': []}
+    data['model'] = model
+    with open(filename, 'wb') as f:
+        pickle.dump(data, f)
+    print(f"\n💾 Improved model saved to '{filename}'")
+def main():
+    """Main function to analyze and improve the AI"""
+    print("🤖 TIC-TAC-TOE AI DIAGNOSTIC TOOL")
+    print("="*60)
+    # Load the model
+    model = load_ai_model()
+    if model is None:
+        return
+    # Test with common scenarios
+    test_ai_with_common_scenarios(model)
+    # Analyze learning patterns
+    analyze_learning_patterns(model)
+    # Check for specific strategies
+    check_for_specific_patterns(model)
+    # Offer to improve the AI
+    print("\n" + "="*60)
+    improve = input("\nWould you like to add basic Tic-Tac-Toe knowledge to the AI? (y/n): ").strip().lower()
+    if improve == 'y':
+        model = train_ai_offline(model)
+        save_improved_model(model)
+        print("\n✅ AI has been improved with basic strategies!")
+        print("Restart the game and use 'ttt_ai_model_improved.pkl' for better performance")
+    else:
+        print("\n📝 Recommendations for improving the AI through gameplay:")
+        print("1. Play more games against the AI")
+        print("2. Let the AI watch you play against itself")
+        print("3. Adjust learning rate to 0.2-0.3 for faster learning")
+        print("4. Reduce exploration rate to 0.1 once AI starts winning")
+        print("5. Play both as X and O to teach both perspectives")
+if __name__ == "__main__":
+    main()

Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.30.57 PM.png ADDED Viewed

Git LFS Details

SHA256: 16cb0eda2c9bb0fbaa3badbf822812e0b9ca26c07eeb451596ea1d93886c5e19
Pointer size: 132 Bytes
Size of remote file: 2.32 MB

Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.32 PM.png ADDED Viewed

Git LFS Details

SHA256: 8778d70b8bfb2df36d4d9106d82c943fe542d4390385ccbbf63f88f1ee876fb3
Pointer size: 132 Bytes
Size of remote file: 2.25 MB

Imitation Learning Tic Tac Toe AI/Screenshot 2025-12-02 at 12.33.43 PM.png ADDED Viewed

Git LFS Details

SHA256: e13da2cf84d7c25c069e0a210819184e09bb3ff6f48841ad86163ef0c459c6d9
Pointer size: 132 Bytes
Size of remote file: 2.26 MB

Imitation Learning Tic Tac Toe AI/analyze_ttt_model.py ADDED Viewed

	@@ -0,0 +1,377 @@

+import pickle
+import numpy as np
+import os
+from collections import deque
+import json
+def load_pickle_file(filename="ttt_ai_model.pkl"):
+    """
+    Load and analyze the Tic-Tac-Toe AI model pickle file.
+    Args:
+        filename (str): Path to the pickle file
+    Returns:
+        dict: The loaded data or None if file doesn't exist
+    """
+    if not os.path.exists(filename):
+        print(f"❌ File '{filename}' not found!")
+        print("Possible reasons:")
+        print("1. The game hasn't been played yet")
+        print("2. The file was saved with a different name")
+        print("3. The file is in a different directory")
+        return None
+    try:
+        print(f"📂 Opening '{filename}'...")
+        # Load the pickle file
+        with open(filename, 'rb') as f:
+            data = pickle.load(f)
+        print("✅ File loaded successfully!")
+        print("\n" + "="*60)
+        return data
+    except Exception as e:
+        print(f"❌ Error loading pickle file: {e}")
+        print(f"Error type: {type(e).__name__}")
+        return None
+def analyze_model(data):
+    """
+    Analyze and display information about the AI model.
+    Args:
+        data (dict): The loaded pickle data
+    """
+    if not data:
+        print("No data to analyze")
+        return
+    print("📊 MODEL ANALYSIS")
+    print("="*60)
+    # Check what keys are available
+    print(f"Keys in data: {list(data.keys())}")
+    # Analyze model matrix if present
+    if 'model' in data:
+        model = data['model']
+        print(f"\n🤖 AI Model Information:")
+        print(f"   Shape: {model.shape}")
+        print(f"   Size: {model.size:,} elements")
+        print(f"   Data type: {model.dtype}")
+        # Calculate some statistics
+        print(f"\n📈 Model Statistics:")
+        print(f"   Non-zero entries: {np.count_nonzero(model):,}")
+        print(f"   Zero entries: {np.sum(model == 0):,}")
+        print(f"   Sparsity: {(np.sum(model == 0) / model.size) * 100:.2f}%")
+        # Get min, max, mean values
+        if model.size > 0:
+            flat_model = model.flatten()
+            non_zero_values = flat_model[flat_model != 0]
+            if len(non_zero_values) > 0:
+                print(f"   Min value (non-zero): {non_zero_values.min():.6f}")
+                print(f"   Max value: {flat_model.max():.6f}")
+                print(f"   Mean value (non-zero): {non_zero_values.mean():.6f}")
+                print(f"   Std dev (non-zero): {non_zero_values.std():.6f}")
+            # Count of positive vs negative values
+            positive = np.sum(flat_model > 0)
+            negative = np.sum(flat_model < 0)
+            print(f"   Positive values: {positive:,}")
+            print(f"   Negative values: {negative:,}")
+    # Analyze experience replay if present
+    if 'experience' in data:
+        experience = data['experience']
+        print(f"\n🎮 Experience Replay Buffer:")
+        print(f"   Number of experiences: {len(experience):,}")
+        if experience:
+            # Show first few experiences
+            print(f"   Sample experience (first):")
+            if hasattr(experience[0], '__len__'):
+                print(f"     Length: {len(experience[0])}")
+                if len(experience[0]) > 0:
+                    print(f"     First element type: {type(experience[0][0])}")
+    # Check for other data
+    for key in data.keys():
+        if key not in ['model', 'experience']:
+            value = data[key]
+            print(f"\n🔍 {key}:")
+            print(f"   Type: {type(value)}")
+            if isinstance(value, (list, tuple, deque)):
+                print(f"   Length: {len(value)}")
+            elif isinstance(value, dict):
+                print(f"   Keys: {list(value.keys())[:5]}..." if len(value) > 5 else f"   Keys: {list(value.keys())}")
+            elif isinstance(value, np.ndarray):
+                print(f"   Shape: {value.shape}")
+def examine_specific_states(model, num_states=5):
+    """
+    Examine specific state-action values in the model.
+    Args:
+        model (np.ndarray): The AI model
+        num_states (int): Number of states to examine
+    """
+    print(f"\n🔬 Examining {num_states} specific states:")
+    print("-"*40)
+    # Find states with non-zero values
+    non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
+    if len(non_zero_indices) > 0:
+        print(f"Found {len(non_zero_indices):,} states with learned values")
+        # Sample some states to examine
+        if len(non_zero_indices) > num_states:
+            sample_indices = np.random.choice(non_zero_indices, num_states, replace=False)
+        else:
+            sample_indices = non_zero_indices
+        for i, state_idx in enumerate(sample_indices):
+            q_values = model[state_idx]
+            non_zero_q = q_values[q_values != 0]
+            if len(non_zero_q) > 0:
+                print(f"\nState {i+1} (Index {state_idx}):")
+                print(f"  Non-zero Q-values: {len(non_zero_q)}")
+                print(f"  Actions with values:")
+                for action in np.where(q_values != 0)[0]:
+                    print(f"    Action {action}: {q_values[action]:.4f}")
+    else:
+        print("No states with learned values found yet.")
+def decode_state(state_index):
+    """
+    Convert a state index back to a board representation.
+    This assumes the same encoding used in the game.
+    Args:
+        state_index (int): The encoded state index
+    Returns:
+        list: Board representation (0=empty, 1=X, 2=O)
+    """
+    board = [0] * 9
+    temp_index = state_index
+    for i in range(9):
+        board[i] = temp_index % 3
+        temp_index //= 3
+    return board
+def display_board(board):
+    """
+    Display a Tic-Tac-Toe board in human-readable format.
+    Args:
+        board (list): Board representation
+    """
+    symbols = {0: '.', 1: 'X', 2: 'O'}
+    print("Board state:")
+    for row in range(3):
+        row_chars = [symbols[board[row*3 + col]] for col in range(3)]
+        print(" " + " | ".join(row_chars))
+        if row < 2:
+            print(" " + "-" * 9)
+def explore_model_interactively(model):
+    """
+    Interactive exploration of the model.
+    Args:
+        model (np.ndarray): The AI model
+    """
+    print("\n🎯 INTERACTIVE EXPLORATION")
+    print("="*60)
+    while True:
+        print("\nOptions:")
+        print("1. Look up a specific state")
+        print("2. Find states with highest Q-values")
+        print("3. Find best action for a given state")
+        print("4. Exit exploration")
+        choice = input("\nEnter your choice (1-4): ").strip()
+        if choice == '1':
+            try:
+                state_idx = int(input("Enter state index (0-19682): "))
+                if 0 <= state_idx < model.shape[0]:
+                    board = decode_state(state_idx)
+                    display_board(board)
+                    q_values = model[state_idx]
+                    print(f"\nQ-values for state {state_idx}:")
+                    for action in range(9):
+                        if q_values[action] != 0:
+                            print(f"  Action {action} (row {action//3}, col {action%3}): {q_values[action]:.4f}")
+                    # Show best action
+                    best_action = np.argmax(q_values)
+                    print(f"\nBest action: {best_action} (row {best_action//3}, col {best_action%3})")
+                else:
+                    print("Invalid state index!")
+            except ValueError:
+                print("Please enter a valid number!")
+        elif choice == '2':
+            try:
+                num_states = int(input("How many top states? (1-100): "))
+                num_states = max(1, min(100, num_states))
+                # Find states with maximum Q-values
+                max_q_per_state = np.max(model, axis=1)
+                top_indices = np.argsort(max_q_per_state)[-num_states:][::-1]
+                print(f"\nTop {num_states} states with highest Q-values:")
+                for i, idx in enumerate(top_indices[:10]):  # Show first 10
+                    max_q = max_q_per_state[idx]
+                    if max_q > 0:
+                        board = decode_state(idx)
+                        print(f"\n{i+1}. State {idx} (max Q: {max_q:.4f})")
+                        display_board(board)
+            except ValueError:
+                print("Please enter a valid number!")
+        elif choice == '3':
+            # Create a board manually
+            print("\nEnter board state (9 numbers, 0=empty, 1=X, 2=O)")
+            print("Example: 0 0 1 0 2 0 0 0 0")
+            try:
+                board_input = input("Board: ").strip()
+                if len(board_input) == 0:
+                    # Use default example
+                    board = [0, 0, 1, 0, 2, 0, 0, 0, 0]
+                else:
+                    board = [int(x) for x in board_input.split()]
+                if len(board) != 9 or any(x not in [0, 1, 2] for x in board):
+                    raise ValueError("Invalid board")
+                display_board(board)
+                # Convert to state index
+                state_idx = 0
+                for i, cell in enumerate(board):
+                    state_idx += cell * (3 ** i)
+                q_values = model[state_idx]
+                # Only show available moves
+                available_moves = [i for i, cell in enumerate(board) if cell == 0]
+                print("\nAvailable moves and their Q-values:")
+                for move in available_moves:
+                    q_val = q_values[move]
+                    row, col = divmod(move, 3)
+                    print(f"  Move {move} (row {row}, col {col}): {q_val:.4f}")
+                if available_moves:
+                    best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
+                    row, col = divmod(best_move, 3)
+                    print(f"\nRecommended move: {best_move} (row {row}, col {col})")
+                else:
+                    print("No available moves!")
+            except Exception as e:
+                print(f"Error: {e}")
+        elif choice == '4':
+            print("Exiting interactive exploration.")
+            break
+        else:
+            print("Invalid choice!")
+def save_model_summary(data, filename="model_summary.json"):
+    """
+    Save a summary of the model to a JSON file.
+    Args:
+        data (dict): The loaded pickle data
+        filename (str): Output JSON filename
+    """
+    if not data:
+        return
+    summary = {}
+    if 'model' in data:
+        model = data['model']
+        summary['model'] = {
+            'shape': model.shape,
+            'size': int(model.size),
+            'non_zero_entries': int(np.count_nonzero(model)),
+            'sparsity': float((np.sum(model == 0) / model.size) * 100)
+        }
+    if 'experience' in data:
+        experience = data['experience']
+        summary['experience'] = {
+            'count': len(experience)
+        }
+    try:
+        with open(filename, 'w') as f:
+            json.dump(summary, f, indent=2)
+        print(f"\n💾 Model summary saved to '{filename}'")
+    except Exception as e:
+        print(f"Error saving summary: {e}")
+def main():
+    """
+    Main function to load and analyze the pickle file.
+    """
+    print("🔍 Tic-Tac-Toe AI Model Analyzer")
+    print("="*60)
+    # Try to load the pickle file
+    filename = "ttt_ai_model.pkl"
+    data = load_pickle_file(filename)
+    if data:
+        # Analyze the model
+        analyze_model(data)
+        # If model exists, do more detailed analysis
+        if 'model' in data:
+            # Examine specific states
+            examine_specific_states(data['model'])
+            # Interactive exploration
+            explore = input("\nWould you like to explore the model interactively? (y/n): ").strip().lower()
+            if explore == 'y':
+                explore_model_interactively(data['model'])
+            # Save summary
+            save = input("\nWould you like to save a summary? (y/n): ").strip().lower()
+            if save == 'y':
+                save_model_summary(data)
+        # Additional file info
+        print("\n📄 File Information:")
+        print(f"   File size: {os.path.getsize(filename):,} bytes")
+        print(f"   Last modified: {os.path.getmtime(filename):.0f}")
+    print("\n" + "="*60)
+    print("Analysis complete!")
+if __name__ == "__main__":
+    main()

Imitation Learning Tic Tac Toe AI/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ PyQt5
2	+ numpy

Imitation Learning Tic Tac Toe AI/tic_tac_toe_ai.py ADDED Viewed

	@@ -0,0 +1,483 @@

+import sys
+import numpy as np
+import random
+from collections import deque
+import pickle
+import os
+from PyQt5.QtWidgets import *
+from PyQt5.QtCore import *
+from PyQt5.QtGui import *
+class TicTacToeAI:
+    """AI that learns by imitating human player moves"""
+    def __init__(self):
+        self.experience = deque(maxlen=10000)
+        self.state_history = []
+        self.move_history = []
+        self.model = self.create_model()
+        self.learning_rate = 0.1
+        self.epsilon = 0.3  # Exploration rate
+        self.load_data()
+    def create_model(self):
+        """Simple Q-learning style model"""
+        # State representation: 9 cells (0=empty, 1=X, 2=O)
+        # Action: 9 possible moves
+        return np.zeros((3**9, 9))  # Simplified representation
+    def state_to_index(self, board):
+        """Convert board state to a unique index"""
+        index = 0
+        for i, cell in enumerate(board):
+            index += cell * (3 ** i)
+        return index
+    def get_action(self, board, available_moves):
+        """Choose an action based on current policy"""
+        # Random exploration
+        if random.random() < self.epsilon:
+            return random.choice(available_moves)
+        # Exploitation: choose best learned move
+        state_idx = self.state_to_index(board)
+        q_values = self.model[state_idx]
+        # Filter available moves and choose best
+        available_q_values = [q_values[move] if move in available_moves else -float('inf')
+                             for move in range(9)]
+        return np.argmax(available_q_values)
+    def record_move(self, board, move):
+        """Record state-action pair for learning"""
+        self.state_history.append(board.copy())
+        self.move_history.append(move)
+    def learn_from_game(self, winner):
+        """Learn from the completed game"""
+        if not self.state_history:
+            return
+        reward = 0.1 if winner == 'O' else -0.1 if winner == 'X' else 0.05
+        for i, (state, move) in enumerate(zip(self.state_history, self.move_history)):
+            state_idx = self.state_to_index(state)
+            self.model[state_idx][move] += self.learning_rate * reward
+        # Clear history for next game
+        self.state_history = []
+        self.move_history = []
+        self.save_data()
+    def save_data(self):
+        """Save learned model"""
+        try:
+            data = {
+                'model': self.model,
+                'experience': list(self.experience)
+            }
+            with open('ttt_ai_model.pkl', 'wb') as f:
+                pickle.dump(data, f)
+        except:
+            pass
+    def load_data(self):
+        """Load saved model"""
+        if os.path.exists('ttt_ai_model.pkl'):
+            try:
+                with open('ttt_ai_model.pkl', 'rb') as f:
+                    data = pickle.load(f)
+                    self.model = data.get('model', self.model)
+                    self.experience = deque(data.get('experience', []), maxlen=10000)
+            except:
+                pass
+class TicTacToeGame:
+    """Game logic"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.board = [0] * 9  # 0=empty, 1=X, 2=O
+        self.current_player = 1  # X starts
+        self.winner = None
+        self.game_over = False
+        self.moves = 0
+    def make_move(self, position):
+        """Make a move at given position"""
+        if self.board[position] != 0 or self.game_over:
+            return False
+        self.board[position] = self.current_player
+        self.moves += 1
+        # Check for winner
+        self.winner = self.check_winner()
+        if self.winner or self.moves == 9:
+            self.game_over = True
+        else:
+            # Switch player
+            self.current_player = 3 - self.current_player  # Switches between 1 and 2
+        return True
+    def check_winner(self):
+        """Check if there's a winner"""
+        winning_combinations = [
+            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
+            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
+            [0, 4, 8], [2, 4, 6]              # Diagonals
+        ]
+        for combo in winning_combinations:
+            if (self.board[combo[0]] == self.board[combo[1]] ==
+                self.board[combo[2]] != 0):
+                return 'X' if self.board[combo[0]] == 1 else 'O'
+        return None
+    def get_available_moves(self):
+        """Get list of available positions"""
+        return [i for i, cell in enumerate(self.board) if cell == 0]
+    def get_board_state(self):
+        """Return copy of board"""
+        return self.board.copy()
+class GameBoard(QWidget):
+    """Tic-Tac-Toe game board widget"""
+    def __init__(self):
+        super().__init__()
+        self.cell_size = 100
+        self.setFixedSize(self.cell_size * 3 + 20, self.cell_size * 3 + 20)
+    def paintEvent(self, event):
+        painter = QPainter(self)
+        painter.setRenderHint(QPainter.Antialiasing)
+        # Draw background
+        painter.fillRect(self.rect(), QColor(240, 240, 240))
+        # Draw grid
+        painter.setPen(QPen(QColor(0, 0, 0), 3))
+        for i in range(1, 3):
+            # Vertical lines
+            painter.drawLine(
+                self.cell_size * i + 10, 10,
+                self.cell_size * i + 10, self.cell_size * 3 + 10
+            )
+            # Horizontal lines
+            painter.drawLine(
+                10, self.cell_size * i + 10,
+                self.cell_size * 3 + 10, self.cell_size * i + 10
+            )
+        # Draw X's and O's
+        if hasattr(self, 'game'):
+            for i in range(9):
+                row, col = divmod(i, 3)
+                x = col * self.cell_size + 10
+                y = row * self.cell_size + 10
+                if self.game.board[i] == 1:  # X
+                    painter.setPen(QPen(QColor(220, 50, 50), 4))
+                    painter.drawLine(x + 20, y + 20, x + self.cell_size - 20, y + self.cell_size - 20)
+                    painter.drawLine(x + self.cell_size - 20, y + 20, x + 20, y + self.cell_size - 20)
+                elif self.game.board[i] == 2:  # O
+                    painter.setPen(QPen(QColor(50, 50, 220), 4))
+                    painter.drawEllipse(x + 20, y + 20, self.cell_size - 40, self.cell_size - 40)
+        # Draw winner line if exists
+        if hasattr(self, 'game') and self.game.winner:
+            self.draw_winner_line(painter)
+    def draw_winner_line(self, painter):
+        """Draw line through winning combination"""
+        winning_combinations = [
+            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
+            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
+            [0, 4, 8], [2, 4, 6]              # Diagonals
+        ]
+        painter.setPen(QPen(QColor(0, 200, 0), 6))
+        for combo in winning_combinations:
+            if (self.game.board[combo[0]] == self.game.board[combo[1]] ==
+                self.game.board[combo[2]] != 0):
+                # Calculate positions
+                positions = []
+                for pos in combo:
+                    row, col = divmod(pos, 3)
+                    x = col * self.cell_size + self.cell_size // 2 + 10
+                    y = row * self.cell_size + self.cell_size // 2 + 10
+                    positions.append((x, y))
+                painter.drawLine(positions[0][0], positions[0][1],
+                                positions[2][0], positions[2][1])
+                break
+    def mousePressEvent(self, event):
+        if event.button() == Qt.LeftButton:
+            x = event.x() - 10
+            y = event.y() - 10
+            if 0 <= x < self.cell_size * 3 and 0 <= y < self.cell_size * 3:
+                col = x // self.cell_size
+                row = y // self.cell_size
+                position = row * 3 + col
+                if hasattr(self, 'on_cell_clicked'):
+                    self.on_cell_clicked(position)
+class ImitationLearningGame(QMainWindow):
+    """Main game window with imitation learning"""
+    def __init__(self):
+        super().__init__()
+        self.game = TicTacToeGame()
+        self.ai = TicTacToeAI()
+        self.player_symbol = 1  # X
+        self.ai_symbol = 2      # O
+        self.training_mode = True
+        self.ai_turn = False
+        self.games_played = 0
+        self.ai_wins = 0
+        self.player_wins = 0
+        self.ties = 0
+        self.init_ui()
+        self.start_new_game()
+    def init_ui(self):
+        self.setWindowTitle('Imitation Learning Tic-Tac-Toe')
+        self.setGeometry(100, 100, 400, 500)
+        # Central widget
+        central_widget = QWidget()
+        self.setCentralWidget(central_widget)
+        layout = QVBoxLayout()
+        # Game board
+        self.board_widget = GameBoard()
+        self.board_widget.on_cell_clicked = self.handle_cell_click
+        self.board_widget.game = self.game
+        layout.addWidget(self.board_widget)
+        # Info panel
+        info_layout = QHBoxLayout()
+        self.status_label = QLabel("Your turn (X)")
+        self.status_label.setFont(QFont('Arial', 14))
+        info_layout.addWidget(self.status_label)
+        info_layout.addStretch()
+        self.stats_label = QLabel("Games: 0 | AI Wins: 0 | Your Wins: 0 | Ties: 0")
+        self.stats_label.setFont(QFont('Arial', 10))
+        info_layout.addWidget(self.stats_label)
+        layout.addLayout(info_layout)
+        # Control panel
+        control_layout = QHBoxLayout()
+        self.train_button = QPushButton("Toggle Training: ON")
+        self.train_button.clicked.connect(self.toggle_training)
+        control_layout.addWidget(self.train_button)
+        self.new_game_button = QPushButton("New Game")
+        self.new_game_button.clicked.connect(self.start_new_game)
+        control_layout.addWidget(self.new_game_button)
+        self.reset_ai_button = QPushButton("Reset AI")
+        self.reset_ai_button.clicked.connect(self.reset_ai)
+        control_layout.addWidget(self.reset_ai_button)
+        layout.addLayout(control_layout)
+        # Learning parameters
+        param_layout = QHBoxLayout()
+        param_layout.addWidget(QLabel("Learning Rate:"))
+        self.learning_rate_slider = QSlider(Qt.Horizontal)
+        self.learning_rate_slider.setRange(1, 20)
+        self.learning_rate_slider.setValue(10)
+        self.learning_rate_slider.valueChanged.connect(self.update_learning_rate)
+        param_layout.addWidget(self.learning_rate_slider)
+        param_layout.addWidget(QLabel("Exploration:"))
+        self.exploration_slider = QSlider(Qt.Horizontal)
+        self.exploration_slider.setRange(0, 100)
+        self.exploration_slider.setValue(30)
+        self.exploration_slider.valueChanged.connect(self.update_exploration)
+        param_layout.addWidget(self.exploration_slider)
+        layout.addLayout(param_layout)
+        # Learning info
+        self.learning_info = QTextEdit()
+        self.learning_info.setMaximumHeight(100)
+        self.learning_info.setReadOnly(True)
+        layout.addWidget(self.learning_info)
+        central_widget.setLayout(layout)
+        # Timer for AI moves
+        self.ai_timer = QTimer()
+        self.ai_timer.timeout.connect(self.make_ai_move)
+        self.add_log("AI initialized. Start playing to train the AI!")
+    def start_new_game(self):
+        self.game.reset()
+        self.ai_turn = False  # Player starts
+        self.status_label.setText("Your turn (X)")
+        self.board_widget.update()
+        if self.ai_turn:
+            self.ai_timer.start(500)  # AI moves after 0.5 seconds
+    def toggle_training(self):
+        self.training_mode = not self.training_mode
+        self.train_button.setText(f"Toggle Training: {'ON' if self.training_mode else 'OFF'}")
+        self.add_log(f"Training mode {'enabled' if self.training_mode else 'disabled'}")
+    def reset_ai(self):
+        self.ai = TicTacToeAI()
+        self.games_played = 0
+        self.ai_wins = 0
+        self.player_wins = 0
+        self.ties = 0
+        self.update_stats()
+        self.add_log("AI has been reset. Starting fresh learning!")
+    def update_learning_rate(self, value):
+        self.ai.learning_rate = value / 100.0
+        self.add_log(f"Learning rate set to {self.ai.learning_rate:.2f}")
+    def update_exploration(self, value):
+        self.ai.epsilon = value / 100.0
+        self.add_log(f"Exploration rate set to {self.ai.epsilon:.2f}")
+    def handle_cell_click(self, position):
+        if self.game.game_over or self.ai_turn:
+            return
+        if self.game.make_move(position):
+            # Record AI's learning from opponent move
+            if self.training_mode:
+                self.ai.record_move(self.game.get_board_state(), position)
+            self.board_widget.update()
+            if self.game.game_over:
+                self.end_game()
+            else:
+                # Switch to AI turn
+                self.ai_turn = True
+                self.status_label.setText("AI thinking...")
+                self.ai_timer.start(500)  # AI moves after 0.5 seconds
+    def make_ai_move(self):
+        self.ai_timer.stop()
+        if self.game.game_over:
+            return
+        available_moves = self.game.get_available_moves()
+        if not available_moves:
+            return
+        # Get AI move
+        ai_move = self.ai.get_action(self.game.get_board_state(), available_moves)
+        if self.game.make_move(ai_move):
+            # Record AI's own move for learning
+            if self.training_mode:
+                self.ai.record_move(self.game.get_board_state(), ai_move)
+            self.board_widget.update()
+            if self.game.game_over:
+                self.end_game()
+            else:
+                self.ai_turn = False
+                self.status_label.setText("Your turn (X)")
+    def end_game(self):
+        winner = self.game.winner
+        # Update statistics
+        self.games_played += 1
+        if winner == 'X':
+            self.player_wins += 1
+            result_text = "You win!"
+        elif winner == 'O':
+            self.ai_wins += 1
+            result_text = "AI wins!"
+        else:
+            self.ties += 1
+            result_text = "It's a tie!"
+        # AI learns from the game
+        if self.training_mode:
+            self.ai.learn_from_game(winner)
+            self.add_log(f"Game {self.games_played}: {result_text} AI learning updated.")
+        else:
+            self.add_log(f"Game {self.games_played}: {result_text}")
+        self.status_label.setText(result_text)
+        self.update_stats()
+        # Show end game dialog
+        msg = QMessageBox()
+        msg.setWindowTitle("Game Over")
+        msg.setText(result_text)
+        msg.setInformativeText("Do you want to play again?")
+        msg.setStandardButtons(QMessageBox.Yes | QMessageBox.No)
+        if msg.exec_() == QMessageBox.Yes:
+            self.start_new_game()
+    def update_stats(self):
+        self.stats_label.setText(
+            f"Games: {self.games_played} | "
+            f"AI Wins: {self.ai_wins} | "
+            f"Your Wins: {self.player_wins} | "
+            f"Ties: {self.ties}"
+        )
+    def add_log(self, message):
+        self.learning_info.append(f"[Game {self.games_played}] {message}")
+def main():
+    app = QApplication(sys.argv)
+    app.setStyle('Fusion')
+    # Set dark theme
+    palette = QPalette()
+    palette.setColor(QPalette.Window, QColor(53, 53, 53))
+    palette.setColor(QPalette.WindowText, Qt.white)
+    palette.setColor(QPalette.Base, QColor(25, 25, 25))
+    palette.setColor(QPalette.AlternateBase, QColor(53, 53, 53))
+    palette.setColor(QPalette.ToolTipBase, Qt.white)
+    palette.setColor(QPalette.ToolTipText, Qt.white)
+    palette.setColor(QPalette.Text, Qt.white)
+    palette.setColor(QPalette.Button, QColor(53, 53, 53))
+    palette.setColor(QPalette.ButtonText, Qt.white)
+    palette.setColor(QPalette.BrightText, Qt.red)
+    palette.setColor(QPalette.Link, QColor(42, 130, 218))
+    palette.setColor(QPalette.Highlight, QColor(42, 130, 218))
+    palette.setColor(QPalette.HighlightedText, Qt.black)
+    app.setPalette(palette)
+    game = ImitationLearningGame()
+    game.show()
+    sys.exit(app.exec_())
+if __name__ == '__main__':
+    main()

Imitation Learning Tic Tac Toe AI/ttt_ai_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb75c4a0df6e020246eee77f4304d89180acc388c925abfa27f4236c94279ec
+size 1417366

Imitation Learning Tic Tac Toe AI/ttt_diagnostic.py ADDED Viewed

	@@ -0,0 +1,351 @@

+import pickle
+import numpy as np
+import os
+def load_ai_model():
+    """Load the AI model from pickle file"""
+    filename = "ttt_ai_model.pkl"
+    if not os.path.exists(filename):
+        print("Model file not found!")
+        return None
+    with open(filename, 'rb') as f:
+        data = pickle.load(f)
+    return data['model']
+def decode_state(state_index):
+    """Convert state index to board representation"""
+    board = [0] * 9
+    temp_index = state_index
+    for i in range(9):
+        board[i] = temp_index % 3
+        temp_index //= 3
+    return board
+def display_board(board):
+    """Display Tic-Tac-Toe board"""
+    symbols = {0: '.', 1: 'X', 2: 'O'}
+    print("Current board:")
+    for row in range(3):
+        row_chars = [symbols[board[row*3 + col]] for col in range(3)]
+        print(" " + " | ".join(row_chars))
+        if row < 2:
+            print(" " + "-" * 9)
+def test_ai_with_common_scenarios(model):
+    """Test AI with common Tic-Tac-Toe scenarios"""
+    print("\n🤖 TESTING AI WITH COMMON SCENARIOS")
+    print("="*50)
+    test_cases = [
+        # Empty board
+        {
+            "name": "Empty board - first move",
+            "board": [0,0,0,0,0,0,0,0,0],
+            "expected": "Center (4) or corners (0,2,6,8)"
+        },
+        # Center taken by opponent
+        {
+            "name": "Opponent took center",
+            "board": [0,0,0,0,1,0,0,0,0],
+            "expected": "A corner (0,2,6,8)"
+        },
+        # Winning move for AI
+        {
+            "name": "AI can win in one move",
+            "board": [2,1,0,  # O X .
+                      1,2,0,  # X O .
+                      0,0,0], # . . .
+            "expected": "Move 8 to complete diagonal"
+        },
+        # Block opponent's winning move
+        {
+            "name": "Block opponent's winning move",
+            "board": [1,0,0,  # X . .
+                      1,2,0,  # X O .
+                      0,0,0], # . . .
+            "expected": "Move 6 to block vertical"
+        },
+        # Fork opportunity
+        {
+            "name": "Fork opportunity",
+            "board": [2,0,1,  # O . X
+                      0,1,0,  # . X .
+                      0,0,0], # . . .
+            "expected": "Move 8 to create fork"
+        }
+    ]
+    for test in test_cases:
+        print(f"\n📋 {test['name']}")
+        display_board(test['board'])
+        # Convert board to state index
+        state_idx = 0
+        for i, cell in enumerate(test['board']):
+            state_idx += cell * (3 ** i)
+        # Get Q-values for this state
+        q_values = model[state_idx]
+        # Get available moves
+        available_moves = [i for i, cell in enumerate(test['board']) if cell == 0]
+        if available_moves:
+            print("\nAvailable moves and Q-values:")
+            for move in available_moves:
+                q_val = q_values[move]
+                row, col = divmod(move, 3)
+                symbol = "⚠️" if q_val > 0 else "  "
+                print(f"{symbol} Move {move} (row {row}, col {col}): {q_val:.4f}")
+            # AI's recommended move
+            best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
+            row, col = divmod(best_move, 3)
+            print(f"\n🤖 AI's choice: Move {best_move} (row {row}, col {col})")
+            print(f"📋 Expected: {test['expected']}")
+            # Check if AI learned something useful
+            best_q = q_values[best_move]
+            if best_q > 0:
+                print("✅ AI has positive association with this move")
+            elif best_q < 0:
+                print("❌ AI has negative association with this move (thinks it's bad)")
+            else:
+                print("➖ AI has no learning for this move")
+        else:
+            print("No available moves!")
+def analyze_learning_patterns(model):
+    """Analyze what patterns the AI has learned"""
+    print("\n🔍 ANALYZING LEARNING PATTERNS")
+    print("="*50)
+    # Find all states with non-zero Q-values
+    non_zero_indices = np.nonzero(np.any(model != 0, axis=1))[0]
+    print(f"Total states with learning: {len(non_zero_indices)}")
+    # Categorize by game phase
+    phases = {
+        "early": [],    # 0-2 moves made
+        "mid": [],      # 3-5 moves made
+        "late": []      # 6-8 moves made
+    }
+    for idx in non_zero_indices:
+        board = decode_state(idx)
+        moves_made = sum(1 for cell in board if cell != 0)
+        if moves_made <= 2:
+            phases["early"].append(idx)
+        elif moves_made <= 5:
+            phases["mid"].append(idx)
+        else:
+            phases["late"].append(idx)
+    print(f"\nLearning by game phase:")
+    print(f"  Early game (0-2 moves): {len(phases['early'])} states")
+    print(f"  Mid game (3-5 moves): {len(phases['mid'])} states")
+    print(f"  Late game (6-8 moves): {len(phases['late'])} states")
+    # Analyze Q-value distribution
+    all_q_values = model[non_zero_indices].flatten()
+    non_zero_q = all_q_values[all_q_values != 0]
+    if len(non_zero_q) > 0:
+        print(f"\nQ-value analysis:")
+        print(f"  Total Q-values: {len(non_zero_q)}")
+        print(f"  Positive Q-values: {np.sum(non_zero_q > 0)}")
+        print(f"  Negative Q-values: {np.sum(non_zero_q < 0)}")
+        print(f"  Average Q-value: {np.mean(non_zero_q):.4f}")
+        print(f"  Most positive: {np.max(non_zero_q):.4f}")
+        print(f"  Most negative: {np.min(non_zero_q):.4f}")
+    # Show examples of what AI learned
+    print("\n📚 Examples of learned states:")
+    # Find states with positive Q-values
+    positive_states = []
+    for idx in non_zero_indices:
+        if np.any(model[idx] > 0):
+            positive_states.append(idx)
+    if positive_states:
+        print(f"\nFound {len(positive_states)} states with positive associations")
+        for i, idx in enumerate(positive_states[:3]):  # Show first 3
+            board = decode_state(idx)
+            print(f"\nExample {i+1}:")
+            display_board(board)
+            q_values = model[idx]
+            positive_moves = np.where(q_values > 0)[0]
+            print("Moves AI thinks are good:")
+            for move in positive_moves:
+                print(f"  Move {move} (row {move//3}, col {move%3}): {q_values[move]:.4f}")
+    else:
+        print("No positive associations found - AI hasn't learned winning strategies yet")
+def check_for_specific_patterns(model):
+    """Check if AI has learned specific Tic-Tac-Toe strategies"""
+    print("\n🎯 CHECKING FOR SPECIFIC STRATEGIES")
+    print("="*50)
+    strategies = {
+        "prefer_center": 0,
+        "prefer_corners": 0,
+        "prefer_edges": 0,
+        "block_opponent": 0,
+        "create_fork": 0,
+        "avoid_losing": 0
+    }
+    # Check common winning/blocking patterns
+    patterns_to_check = [
+        # Center preference
+        ([0,0,0,0,0,0,0,0,0], [4], "prefer_center"),
+        # Corner openings
+        ([0,0,0,0,1,0,0,0,0], [0,2,6,8], "prefer_corners"),
+        # Block vertical
+        ([1,0,0,1,2,0,0,0,0], [6], "block_opponent"),
+        # Block horizontal
+        ([1,1,0,0,2,0,0,0,0], [2], "block_opponent"),
+        # Block diagonal
+        ([1,0,0,0,1,0,0,0,0], [8], "block_opponent"),
+    ]
+    for board_pattern, good_moves, strategy in patterns_to_check:
+        state_idx = 0
+        for i, cell in enumerate(board_pattern):
+            state_idx += cell * (3 ** i)
+        q_values = model[state_idx]
+        available_moves = [i for i, cell in enumerate(board_pattern) if cell == 0]
+        if available_moves:
+            # Check if AI prefers any of the good moves
+            best_move = available_moves[np.argmax([q_values[m] for m in available_moves])]
+            if best_move in good_moves:
+                strategies[strategy] += 1
+                print(f"✅ AI correctly prefers {strategy.replace('_', ' ')} in this scenario")
+            else:
+                print(f"❌ AI doesn't recognize {strategy.replace('_', ' ')}")
+    print(f"\nStrategy recognition summary:")
+    for strategy, count in strategies.items():
+        print(f"  {strategy}: {count}/1")
+def train_ai_offline(model, num_games=1000):
+    """Simulate games to improve the AI offline"""
+    print("\n🎮 SIMULATING OFFLINE TRAINING")
+    print("="*50)
+    print(f"Simulating {num_games} games of self-play...")
+    # Simple self-play simulation
+    import random
+    # We'll create a simple improvement by adding some basic strategies
+    original_non_zero = np.count_nonzero(model)
+    # Add some basic Tic-Tac-Toe knowledge
+    # Center is good
+    empty_board_idx = 0  # All zeros
+    model[empty_board_idx][4] = 0.1  # Center is good
+    # Corners are good when center is taken
+    center_taken_idx = 3**4  # Only center is 1
+    for corner in [0, 2, 6, 8]:
+        model[center_taken_idx][corner] = 0.08
+    # Blocking is good
+    # Example: opponent has two in a row
+    for i in range(9):
+        board = [0] * 9
+        board[i] = 1
+        board[(i+3)%9] = 1
+        if board[6] == 0:  # Check if third in column is empty
+            state_idx = 0
+            for j, cell in enumerate(board):
+                state_idx += cell * (3 ** j)
+            blocking_move = 6
+            model[state_idx][blocking_move] = 0.15
+    new_non_zero = np.count_nonzero(model)
+    improvement = new_non_zero - original_non_zero
+    print(f"Added {improvement} new learned values")
+    print("Basic Tic-Tac-Toe strategies have been added to the AI")
+    return model
+def save_improved_model(model):
+    """Save the improved model"""
+    filename = "ttt_ai_model_improved.pkl"
+    # Load existing data to preserve experience buffer
+    original_filename = "ttt_ai_model.pkl"
+    if os.path.exists(original_filename):
+        with open(original_filename, 'rb') as f:
+            data = pickle.load(f)
+    else:
+        data = {'model': model, 'experience': []}
+    data['model'] = model
+    with open(filename, 'wb') as f:
+        pickle.dump(data, f)
+    print(f"\n💾 Improved model saved to '{filename}'")
+def main():
+    """Main function to analyze and improve the AI"""
+    print("🤖 TIC-TAC-TOE AI DIAGNOSTIC TOOL")
+    print("="*60)
+    # Load the model
+    model = load_ai_model()
+    if model is None:
+        return
+    # Test with common scenarios
+    test_ai_with_common_scenarios(model)
+    # Analyze learning patterns
+    analyze_learning_patterns(model)
+    # Check for specific strategies
+    check_for_specific_patterns(model)
+    # Offer to improve the AI
+    print("\n" + "="*60)
+    improve = input("\nWould you like to add basic Tic-Tac-Toe knowledge to the AI? (y/n): ").strip().lower()
+    if improve == 'y':
+        model = train_ai_offline(model)
+        save_improved_model(model)
+        print("\n✅ AI has been improved with basic strategies!")
+        print("Restart the game and use 'ttt_ai_model_improved.pkl' for better performance")
+    else:
+        print("\n📝 Recommendations for improving the AI through gameplay:")
+        print("1. Play more games against the AI")
+        print("2. Let the AI watch you play against itself")
+        print("3. Adjust learning rate to 0.2-0.3 for faster learning")
+        print("4. Reduce exploration rate to 0.1 once AI starts winning")
+        print("5. Play both as X and O to teach both perspectives")
+if __name__ == "__main__":
+    main()

Tic Tac Toe RL/app.py ADDED Viewed

	@@ -0,0 +1,721 @@

+import sys
+import os
+import random
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from collections import deque
+import datetime
+import csv
+import logging
+from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
+                             QHBoxLayout, QGridLayout, QPushButton, QLabel,
+                             QTextEdit, QTabWidget, QGroupBox, QSpinBox,
+                             QDoubleSpinBox, QCheckBox, QProgressBar, QComboBox)
+from PyQt5.QtCore import QTimer, Qt, QThread, pyqtSignal
+from PyQt5.QtGui import QFont, QPalette, QColor
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('tic_tac_toe_training.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+class DuelingDQN(nn.Module):
+    def __init__(self, input_size, output_size, hidden_size=256):
+        super(DuelingDQN, self).__init__()
+        self.input_size = input_size
+        self.output_size = output_size
+        # Feature layer
+        self.feature = nn.Sequential(
+            nn.Linear(input_size, hidden_size),
+            nn.ReLU(),
+            nn.Linear(hidden_size, hidden_size),
+            nn.ReLU(),
+            nn.Linear(hidden_size, hidden_size // 2),
+            nn.ReLU()
+        )
+        # Value stream
+        self.value_stream = nn.Sequential(
+            nn.Linear(hidden_size // 2, hidden_size // 4),
+            nn.ReLU(),
+            nn.Linear(hidden_size // 4, 1)
+        )
+        # Advantage stream
+        self.advantage_stream = nn.Sequential(
+            nn.Linear(hidden_size // 2, hidden_size // 4),
+            nn.ReLU(),
+            nn.Linear(hidden_size // 4, output_size)
+        )
+    def forward(self, state):
+        features = self.feature(state)
+        value = self.value_stream(features)
+        advantage = self.advantage_stream(features)
+        # Combine value and advantage
+        q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
+        return q_values
+class TicTacToeEnvironment:
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.board = np.zeros(9, dtype=int)  # 0: empty, 1: X, 2: O
+        self.current_player = 1  # X starts
+        self.done = False
+        self.winner = 0
+        return self.get_state()
+    def get_state(self):
+        # Return board state as one-hot encoded
+        state = np.zeros(9 * 3, dtype=np.float32)
+        for i in range(9):
+            if self.board[i] == 0:
+                state[i * 3] = 1.0
+            elif self.board[i] == 1:
+                state[i * 3 + 1] = 1.0
+            else:
+                state[i * 3 + 2] = 1.0
+        return state
+    def get_valid_moves(self):
+        return [i for i in range(9) if self.board[i] == 0]
+    def step(self, action):
+        if self.done:
+            return self.get_state(), 0, True, {}
+        if self.board[action] != 0:
+            return self.get_state(), -5, True, {}  # Invalid move penalty
+        # Make move
+        self.board[action] = self.current_player
+        # Check for win
+        if self.check_win(self.current_player):
+            self.done = True
+            self.winner = self.current_player
+            reward = 10  # Win reward
+        # Check for draw
+        elif len(self.get_valid_moves()) == 0:
+            self.done = True
+            reward = 2  # Draw reward
+        else:
+            reward = 0.1  # Small reward for valid move
+            self.current_player = 3 - self.current_player  # Switch player (1->2, 2->1)
+        return self.get_state(), reward, self.done, {'winner': self.winner}
+    def check_win(self, player):
+        winning_combinations = [
+            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
+            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
+            [0, 4, 8], [2, 4, 6]              # Diagonals
+        ]
+        for combo in winning_combinations:
+            if all(self.board[i] == player for i in combo):
+                return True
+        return False
+    def render(self):
+        symbols = {0: ' ', 1: 'X', 2: 'O'}
+        board_str = ""
+        for i in range(3):
+            row = [symbols[self.board[i*3 + j]] for j in range(3)]
+            board_str += " " + " | ".join(row) + " \n"
+            if i < 2:
+                board_str += "-----------\n"
+        return board_str
+class DuelingDQNAgent:
+    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99,
+                 epsilon_start=1.0, epsilon_end=0.01, epsilon_decay=0.999,
+                 target_update=1000, device='auto'):
+        self.state_size = state_size
+        self.action_size = action_size
+        self.learning_rate = learning_rate
+        self.gamma = gamma
+        self.epsilon = epsilon_start
+        self.epsilon_start = epsilon_start
+        self.epsilon_end = epsilon_end
+        self.epsilon_decay = epsilon_decay
+        self.target_update = target_update
+        self.memory = deque(maxlen=50000)
+        self.batch_size = 128
+        self.learn_step_counter = 0
+        # Device selection with MPS priority
+        if device == 'auto':
+            if torch.backends.mps.is_available():
+                self.device = torch.device("mps")
+                logger.info("Using MPS device (Apple Silicon)")
+            elif torch.cuda.is_available():
+                self.device = torch.device("cuda")
+                logger.info("Using CUDA device")
+            else:
+                self.device = torch.device("cpu")
+                logger.info("Using CPU device")
+        else:
+            self.device = torch.device(device)
+        # Networks
+        self.policy_net = DuelingDQN(state_size, action_size).to(self.device)
+        self.target_net = DuelingDQN(state_size, action_size).to(self.device)
+        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate, weight_decay=1e-5)
+        self.update_target_network()
+    def update_target_network(self):
+        self.target_net.load_state_dict(self.policy_net.state_dict())
+    def remember(self, state, action, reward, next_state, done):
+        self.memory.append((state, action, reward, next_state, done))
+    def act(self, state, valid_moves, training=True):
+        if training and random.random() <= self.epsilon:
+            return random.choice(valid_moves)
+        state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            q_values = self.policy_net(state_tensor)
+        # Mask invalid moves
+        q_values_np = q_values.cpu().numpy()[0]
+        masked_q_values = q_values_np.copy()
+        for i in range(self.action_size):
+            if i not in valid_moves:
+                masked_q_values[i] = -float('inf')
+        return np.argmax(masked_q_values)
+    def replay(self):
+        if len(self.memory) < self.batch_size:
+            return 0
+        batch = random.sample(self.memory, self.batch_size)
+        states, actions, rewards, next_states, dones = zip(*batch)
+        states = torch.FloatTensor(np.array(states)).to(self.device)
+        actions = torch.LongTensor(actions).to(self.device)
+        rewards = torch.FloatTensor(rewards).to(self.device)
+        next_states = torch.FloatTensor(np.array(next_states)).to(self.device)
+        dones = torch.BoolTensor(dones).to(self.device)
+        current_q_values = self.policy_net(states).gather(1, actions.unsqueeze(1))
+        with torch.no_grad():
+            next_actions = self.policy_net(next_states).max(1)[1]
+            next_q_values = self.target_net(next_states).gather(1, next_actions.unsqueeze(1))
+            target_q_values = rewards.unsqueeze(1) + (self.gamma * next_q_values * ~dones.unsqueeze(1))
+        loss = F.smooth_l1_loss(current_q_values, target_q_values)
+        self.optimizer.zero_grad()
+        loss.backward()
+        # Gradient clipping
+        torch.nn.utils.clip_grad_norm_(self.policy_net.parameters(), 1.0)
+        self.optimizer.step()
+        # Update target network
+        self.learn_step_counter += 1
+        if self.learn_step_counter % self.target_update == 0:
+            self.update_target_network()
+        # Decay epsilon
+        if self.epsilon > self.epsilon_end:
+            self.epsilon *= self.epsilon_decay
+        return loss.item()
+    def save_model(self, filepath):
+        torch.save({
+            'policy_net_state_dict': self.policy_net.state_dict(),
+            'target_net_state_dict': self.target_net.state_dict(),
+            'optimizer_state_dict': self.optimizer.state_dict(),
+            'epsilon': self.epsilon,
+            'memory': list(self.memory)[-10000:]  # Save recent memory
+        }, filepath)
+        logger.info(f"Model saved to {filepath}")
+    def load_model(self, filepath):
+        if os.path.exists(filepath):
+            checkpoint = torch.load(filepath, map_location=self.device)
+            self.policy_net.load_state_dict(checkpoint['policy_net_state_dict'])
+            self.target_net.load_state_dict(checkpoint['target_net_state_dict'])
+            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            self.epsilon = checkpoint['epsilon']
+            if 'memory' in checkpoint:
+                self.memory = deque(checkpoint['memory'], maxlen=50000)
+            logger.info(f"Model loaded from {filepath}")
+            return True
+        return False
+class TrainingThread(QThread):
+    update_signal = pyqtSignal(dict)
+    finished_signal = pyqtSignal()
+    def __init__(self, agent, env, episodes=1000, save_interval=100):
+        super().__init__()
+        self.agent = agent
+        self.env = env
+        self.episodes = episodes
+        self.save_interval = save_interval
+        self.running = True
+        self.stats = {
+            'episodes': 0,
+            'wins': 0,
+            'losses': 0,
+            'draws': 0,
+            'avg_reward': 0,
+            'epsilon': agent.epsilon,
+            'loss': 0
+        }
+        self.rewards = []
+        self.wins = []
+        self.losses = []
+        self.draws = []
+    def run(self):
+        logger.info(f"Starting training for {self.episodes} episodes")
+        for episode in range(self.episodes):
+            if not self.running:
+                break
+            state = self.env.reset()
+            total_reward = 0
+            steps = 0
+            while True:
+                valid_moves = self.env.get_valid_moves()
+                if not valid_moves:
+                    break
+                action = self.agent.act(state, valid_moves)
+                next_state, reward, done, info = self.env.step(action)
+                self.agent.remember(state, action, reward, next_state, done)
+                loss = self.agent.replay()
+                total_reward += reward
+                steps += 1
+                state = next_state
+                if done:
+                    # Update statistics
+                    if info.get('winner') == 1:  # Agent win
+                        self.stats['wins'] += 1
+                    elif info.get('winner') == 2:  # Opponent win
+                        self.stats['losses'] += 1
+                    else:  # Draw
+                        self.stats['draws'] += 1
+                    self.stats['loss'] = loss if loss else 0
+                    break
+            # Update statistics
+            self.stats['episodes'] = episode + 1
+            self.stats['epsilon'] = self.agent.epsilon
+            self.rewards.append(total_reward)
+            self.stats['avg_reward'] = np.mean(self.rewards[-100:]) if self.rewards else 0
+            # Save data periodically
+            if (episode + 1) % self.save_interval == 0:
+                self.save_training_data(episode + 1)
+                self.agent.save_model(f'models/dueling_dqn_episode_{episode + 1}.pth')
+            # Emit update signal every 10 episodes or at the end
+            if (episode + 1) % 10 == 0 or episode == self.episodes - 1:
+                self.update_signal.emit(self.stats.copy())
+        self.finished_signal.emit()
+    def stop(self):
+        self.running = False
+    def save_training_data(self, episode):
+        data = {
+            'episode': episode,
+            'epsilon': self.agent.epsilon,
+            'avg_reward': self.stats['avg_reward'],
+            'wins': self.stats['wins'],
+            'losses': self.stats['losses'],
+            'draws': self.stats['draws'],
+            'win_rate': self.stats['wins'] / episode if episode > 0 else 0,
+            'timestamp': datetime.datetime.now().isoformat()
+        }
+        # Save to CSV
+        file_exists = os.path.isfile('training_data.csv')
+        with open('training_data.csv', 'a', newline='') as f:
+            writer = csv.DictWriter(f, fieldnames=data.keys())
+            if not file_exists:
+                writer.writeheader()
+            writer.writerow(data)
+class TicTacToeGUI(QMainWindow):
+    def __init__(self):
+        super().__init__()
+        self.env = TicTacToeEnvironment()
+        self.agent = DuelingDQNAgent(state_size=27, action_size=9)  # 9 positions * 3 states each
+        self.training_thread = None
+        self.is_training = False
+        # Create models directory
+        os.makedirs('models', exist_ok=True)
+        self.init_ui()
+        self.update_display()
+    def init_ui(self):
+        self.setWindowTitle("Dueling DQN Tic-Tac-Toe Trainer")
+        self.setGeometry(100, 100, 1200, 800)
+        # Central widget and main layout
+        central_widget = QWidget()
+        self.setCentralWidget(central_widget)
+        main_layout = QHBoxLayout(central_widget)
+        # Left panel - Game board and controls
+        left_panel = QVBoxLayout()
+        # Game board
+        board_group = QGroupBox("Tic-Tac-Toe Board")
+        board_layout = QGridLayout()
+        self.board_buttons = []
+        for i in range(9):
+            btn = QPushButton('')
+            btn.setFixedSize(80, 80)
+            btn.setFont(QFont('Arial', 24))
+            btn.clicked.connect(lambda checked, pos=i: self.human_move(pos))
+            board_layout.addWidget(btn, i // 3, i % 3)
+            self.board_buttons.append(btn)
+        board_group.setLayout(board_layout)
+        left_panel.addWidget(board_group)
+        # Game controls
+        controls_group = QGroupBox("Game Controls")
+        controls_layout = QVBoxLayout()
+        self.status_label = QLabel("Status: Ready")
+        controls_layout.addWidget(self.status_label)
+        control_buttons_layout = QHBoxLayout()
+        self.reset_btn = QPushButton("New Game")
+        self.reset_btn.clicked.connect(self.reset_game)
+        control_buttons_layout.addWidget(self.reset_btn)
+        self.auto_play_btn = QPushButton("Auto Play")
+        self.auto_play_btn.clicked.connect(self.auto_play)
+        control_buttons_layout.addWidget(self.auto_play_btn)
+        controls_layout.addLayout(control_buttons_layout)
+        controls_group.setLayout(controls_layout)
+        left_panel.addWidget(controls_group)
+        # Right panel - Training and visualization
+        right_panel = QVBoxLayout()
+        # Training controls
+        training_group = QGroupBox("Training Controls")
+        training_layout = QVBoxLayout()
+        # Training parameters
+        params_layout = QGridLayout()
+        params_layout.addWidget(QLabel("Episodes:"), 0, 0)
+        self.episodes_spin = QSpinBox()
+        self.episodes_spin.setRange(100, 100000)
+        self.episodes_spin.setValue(5000)
+        params_layout.addWidget(self.episodes_spin, 0, 1)
+        params_layout.addWidget(QLabel("Learning Rate:"), 1, 0)
+        self.lr_spin = QDoubleSpinBox()
+        self.lr_spin.setRange(0.0001, 0.01)
+        self.lr_spin.setValue(0.001)
+        self.lr_spin.setSingleStep(0.0001)
+        self.lr_spin.setDecimals(4)
+        params_layout.addWidget(self.lr_spin, 1, 1)
+        params_layout.addWidget(QLabel("Gamma:"), 2, 0)
+        self.gamma_spin = QDoubleSpinBox()
+        self.gamma_spin.setRange(0.1, 0.999)
+        self.gamma_spin.setValue(0.99)
+        self.gamma_spin.setSingleStep(0.001)
+        self.gamma_spin.setDecimals(3)
+        params_layout.addWidget(self.gamma_spin, 2, 1)
+        params_layout.addWidget(QLabel("Device:"), 3, 0)
+        self.device_combo = QComboBox()
+        self.device_combo.addItems(['auto', 'cpu', 'mps', 'cuda'])
+        self.device_combo.setCurrentText('auto')
+        params_layout.addWidget(self.device_combo, 3, 1)
+        training_layout.addLayout(params_layout)
+        # Training buttons
+        train_buttons_layout = QHBoxLayout()
+        self.train_btn = QPushButton("Start Training")
+        self.train_btn.clicked.connect(self.toggle_training)
+        train_buttons_layout.addWidget(self.train_btn)
+        self.load_model_btn = QPushButton("Load Model")
+        self.load_model_btn.clicked.connect(self.load_model)
+        train_buttons_layout.addWidget(self.load_model_btn)
+        self.save_model_btn = QPushButton("Save Model")
+        self.save_model_btn.clicked.connect(self.save_model)
+        train_buttons_layout.addWidget(self.save_model_btn)
+        training_layout.addLayout(train_buttons_layout)
+        # Progress bar
+        self.progress_bar = QProgressBar()
+        training_layout.addWidget(self.progress_bar)
+        training_group.setLayout(training_layout)
+        right_panel.addWidget(training_group)
+        # Statistics
+        stats_group = QGroupBox("Training Statistics")
+        stats_layout = QVBoxLayout()
+        self.stats_text = QTextEdit()
+        self.stats_text.setReadOnly(True)
+        self.stats_text.setMaximumHeight(150)
+        stats_layout.addWidget(self.stats_text)
+        stats_group.setLayout(stats_layout)
+        right_panel.addWidget(stats_group)
+        # Log output
+        log_group = QGroupBox("Training Log")
+        log_layout = QVBoxLayout()
+        self.log_text = QTextEdit()
+        self.log_text.setReadOnly(True)
+        log_layout.addWidget(self.log_text)
+        log_group.setLayout(log_layout)
+        right_panel.addWidget(log_group)
+        # Add panels to main layout
+        main_layout.addLayout(left_panel, 1)
+        main_layout.addLayout(right_panel, 1)
+        # Timer for auto-play
+        self.auto_play_timer = QTimer()
+        self.auto_play_timer.timeout.connect(self.auto_play_step)
+    def toggle_training(self):
+        if self.is_training:
+            self.stop_training()
+        else:
+            self.start_training()
+    def start_training(self):
+        device = self.device_combo.currentText()
+        self.agent = DuelingDQNAgent(
+            state_size=27,
+            action_size=9,
+            learning_rate=self.lr_spin.value(),
+            gamma=self.gamma_spin.value(),
+            device=device
+        )
+        self.training_thread = TrainingThread(
+            self.agent,
+            TicTacToeEnvironment(),
+            episodes=self.episodes_spin.value(),
+            save_interval=100
+        )
+        self.training_thread.update_signal.connect(self.update_training_stats)
+        self.training_thread.finished_signal.connect(self.training_finished)
+        self.is_training = True
+        self.train_btn.setText("Stop Training")
+        self.status_label.setText("Status: Training...")
+        self.progress_bar.setRange(0, self.episodes_spin.value())
+        self.training_thread.start()
+    def stop_training(self):
+        if self.training_thread:
+            self.training_thread.stop()
+            self.training_thread.wait()
+        self.is_training = False
+        self.train_btn.setText("Start Training")
+        self.status_label.setText("Status: Training Stopped")
+    def training_finished(self):
+        self.is_training = False
+        self.train_btn.setText("Start Training")
+        self.status_label.setText("Status: Training Completed")
+        self.log_message("Training completed!")
+    def update_training_stats(self, stats):
+        self.progress_bar.setValue(stats['episodes'])
+        stats_text = f"""
+Episodes: {stats['episodes']}
+Wins: {stats['wins']} | Losses: {stats['losses']} | Draws: {stats['draws']}
+Win Rate: {stats['wins']/stats['episodes']*100:.1f}%
+Average Reward: {stats['avg_reward']:.3f}
+Epsilon: {stats['epsilon']:.4f}
+Current Loss: {stats['loss']:.4f}
+        """.strip()
+        self.stats_text.setText(stats_text)
+    def log_message(self, message):
+        timestamp = datetime.datetime.now().strftime("%H:%M:%S")
+        self.log_text.append(f"[{timestamp}] {message}")
+        # Auto-scroll to bottom
+        self.log_text.verticalScrollBar().setValue(
+            self.log_text.verticalScrollBar().maximum()
+        )
+    def reset_game(self):
+        self.env.reset()
+        self.update_display()
+        self.status_label.setText("Status: New Game Started")
+        self.auto_play_timer.stop()
+    def human_move(self, position):
+        if self.env.done or self.env.current_player != 1:
+            return
+        valid_moves = self.env.get_valid_moves()
+        if position in valid_moves:
+            state, reward, done, info = self.env.step(position)
+            self.update_display()
+            if done:
+                self.game_over(info)
+            else:
+                # Agent's turn
+                QTimer.singleShot(500, self.agent_move)
+    def agent_move(self):
+        if self.env.done or self.env.current_player != 2:
+            return
+        state = self.env.get_state()
+        valid_moves = self.env.get_valid_moves()
+        if valid_moves:
+            action = self.agent.act(state, valid_moves, training=False)
+            next_state, reward, done, info = self.env.step(action)
+            self.update_display()
+            if done:
+                self.game_over(info)
+    def auto_play(self):
+        if self.env.done:
+            self.reset_game()
+        self.auto_play_timer.start(1000)  # 1 second between moves
+        self.status_label.setText("Status: Auto-playing...")
+    def auto_play_step(self):
+        if self.env.done:
+            self.auto_play_timer.stop()
+            self.status_label.setText("Status: Game Over - Auto-play")
+            return
+        if self.env.current_player == 1:
+            # Human player (random move for demo)
+            valid_moves = self.env.get_valid_moves()
+            if valid_moves:
+                action = random.choice(valid_moves)
+                self.env.step(action)
+        else:
+            # Agent player
+            state = self.env.get_state()
+            valid_moves = self.env.get_valid_moves()
+            if valid_moves:
+                action = self.agent.act(state, valid_moves, training=False)
+                self.env.step(action)
+        self.update_display()
+        if self.env.done:
+            self.auto_play_timer.stop()
+            self.game_over({'winner': self.env.winner})
+    def game_over(self, info):
+        winner = info.get('winner', 0)
+        if winner == 1:
+            self.status_label.setText("Status: You Win!")
+        elif winner == 2:
+            self.status_label.setText("Status: AI Wins!")
+        else:
+            self.status_label.setText("Status: Draw!")
+    def update_display(self):
+        symbols = {0: '', 1: 'X', 2: 'O'}
+        colors = {0: 'black', 1: 'blue', 2: 'red'}
+        for i in range(9):
+            symbol = symbols[self.env.board[i]]
+            color = colors[self.env.board[i]]
+            self.board_buttons[i].setText(symbol)
+            self.board_buttons[i].setStyleSheet(f"color: {color}; font-weight: bold;")
+    def load_model(self):
+        try:
+            if self.agent.load_model('models/dueling_dqn_latest.pth'):
+                self.log_message("Model loaded successfully!")
+                self.status_label.setText("Status: Model Loaded")
+            else:
+                self.log_message("No saved model found!")
+        except Exception as e:
+            self.log_message(f"Error loading model: {str(e)}")
+    def save_model(self):
+        try:
+            self.agent.save_model('models/dueling_dqn_latest.pth')
+            self.log_message("Model saved successfully!")
+        except Exception as e:
+            self.log_message(f"Error saving model: {str(e)}")
+def main():
+    # Create necessary directories
+    os.makedirs('models', exist_ok=True)
+    app = QApplication(sys.argv)
+    # Set application style
+    app.setStyle('Fusion')
+    # Create and show main window
+    window = TicTacToeGUI()
+    window.show()
+    # Log startup message
+    window.log_message("Dueling DQN Tic-Tac-Toe Application Started")
+    window.log_message(f"Using PyTorch {torch.__version__}")
+    window.log_message(f"Available devices: CPU: True, CUDA: {torch.cuda.is_available()}, MPS: {torch.backends.mps.is_available()}")
+    sys.exit(app.exec_())
+if __name__ == '__main__':
+    main()

Tic Tac Toe RL/eval_models.py ADDED Viewed

	@@ -0,0 +1,464 @@

+# eval_models.py
+import os
+import glob
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+import pandas as pd
+from collections import deque
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class DuelingDQN(torch.nn.Module):
+    def __init__(self, input_size, output_size, hidden_size=256):
+        super(DuelingDQN, self).__init__()
+        self.input_size = input_size
+        self.output_size = output_size
+        # Feature layer
+        self.feature = torch.nn.Sequential(
+            torch.nn.Linear(input_size, hidden_size),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size, hidden_size),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size, hidden_size // 2),
+            torch.nn.ReLU()
+        )
+        # Value stream
+        self.value_stream = torch.nn.Sequential(
+            torch.nn.Linear(hidden_size // 2, hidden_size // 4),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size // 4, 1)
+        )
+        # Advantage stream
+        self.advantage_stream = torch.nn.Sequential(
+            torch.nn.Linear(hidden_size // 2, hidden_size // 4),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_size // 4, output_size)
+        )
+    def forward(self, state):
+        features = self.feature(state)
+        value = self.value_stream(features)
+        advantage = self.advantage_stream(features)
+        q_values = value + (advantage - advantage.mean(dim=1, keepdim=True))
+        return q_values
+class TicTacToeEnvironment:
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.board = np.zeros(9, dtype=int)
+        self.current_player = 1
+        self.done = False
+        self.winner = 0
+        return self.get_state()
+    def get_state(self):
+        state = np.zeros(9 * 3, dtype=np.float32)
+        for i in range(9):
+            if self.board[i] == 0:
+                state[i * 3] = 1.0
+            elif self.board[i] == 1:
+                state[i * 3 + 1] = 1.0
+            else:
+                state[i * 3 + 2] = 1.0
+        return state
+    def get_valid_moves(self):
+        return [i for i in range(9) if self.board[i] == 0]
+    def step(self, action):
+        if self.done:
+            return self.get_state(), 0, True, {}
+        if self.board[action] != 0:
+            return self.get_state(), -5, True, {}
+        self.board[action] = self.current_player
+        if self.check_win(self.current_player):
+            self.done = True
+            self.winner = self.current_player
+            reward = 10
+        elif len(self.get_valid_moves()) == 0:
+            self.done = True
+            reward = 2
+        else:
+            reward = 0.1
+            self.current_player = 3 - self.current_player
+        return self.get_state(), reward, self.done, {'winner': self.winner}
+    def check_win(self, player):
+        winning_combinations = [
+            [0, 1, 2], [3, 4, 5], [6, 7, 8],
+            [0, 3, 6], [1, 4, 7], [2, 5, 8],
+            [0, 4, 8], [2, 4, 6]
+        ]
+        for combo in winning_combinations:
+            if all(self.board[i] == player for i in combo):
+                return True
+        return False
+    def render(self):
+        symbols = {0: ' ', 1: 'X', 2: 'O'}
+        board_str = "\n"
+        for i in range(3):
+            row = [symbols[self.board[i*3 + j]] for j in range(3)]
+            board_str += " " + " | ".join(row) + " \n"
+            if i < 2:
+                board_str += "-----------\n"
+        return board_str
+class ModelEvaluator:
+    def __init__(self, models_dir='models'):
+        self.models_dir = models_dir
+        self.device = torch.device("mps" if torch.backends.mps.is_available() else
+                                  "cuda" if torch.cuda.is_available() else "cpu")
+        logger.info(f"Using device: {self.device}")
+    def load_model(self, model_path):
+        """Load model with compatibility for different PyTorch versions"""
+        try:
+            # Try with weights_only=True first (PyTorch 2.6+)
+            checkpoint = torch.load(model_path, map_location=self.device, weights_only=True)
+        except:
+            try:
+                # Fallback to weights_only=False
+                checkpoint = torch.load(model_path, map_location=self.device, weights_only=False)
+            except Exception as e:
+                logger.error(f"Failed to load model {model_path}: {e}")
+                return None
+        state_size = 27
+        action_size = 9
+        model = DuelingDQN(state_size, action_size).to(self.device)
+        model.load_state_dict(checkpoint['policy_net_state_dict'])
+        model.eval()
+        return model
+    def evaluate_model(self, model, num_games=100, agent_player=1):
+        """Evaluate model performance against random opponent"""
+        env = TicTacToeEnvironment()
+        wins = 0
+        losses = 0
+        draws = 0
+        total_reward = 0
+        for game in range(num_games):
+            state = env.reset()
+            game_reward = 0
+            steps = 0
+            while not env.done:
+                valid_moves = env.get_valid_moves()
+                if env.current_player == agent_player:
+                    # Agent's turn
+                    state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+                    with torch.no_grad():
+                        q_values = model(state_tensor)
+                    # Mask invalid moves
+                    q_values_np = q_values.cpu().numpy()[0]
+                    for i in range(9):
+                        if i not in valid_moves:
+                            q_values_np[i] = -float('inf')
+                    action = np.argmax(q_values_np)
+                else:
+                    # Random opponent
+                    action = np.random.choice(valid_moves)
+                state, reward, done, info = env.step(action)
+                if env.current_player == agent_player:
+                    game_reward += reward
+                steps += 1
+            # Determine game outcome from agent's perspective
+            if info['winner'] == agent_player:
+                wins += 1
+            elif info['winner'] == 0:
+                draws += 1
+            else:
+                losses += 1
+            total_reward += game_reward
+        win_rate = wins / num_games
+        avg_reward = total_reward / num_games
+        return {
+            'wins': wins,
+            'losses': losses,
+            'draws': draws,
+            'win_rate': win_rate,
+            'avg_reward': avg_reward
+        }
+    def play_interactive_game(self, model):
+        """Play an interactive game against the model"""
+        env = TicTacToeEnvironment()
+        print("\n🎮 Interactive Game Mode")
+        print("You are 'X', AI is 'O'")
+        print("Enter moves as numbers 0-8 (left to right, top to bottom):")
+        print("0 | 1 | 2")
+        print("---------")
+        print("3 | 4 | 5")
+        print("---------")
+        print("6 | 7 | 8")
+        while not env.done:
+            print(env.render())
+            if env.current_player == 1:  # Human turn
+                valid_moves = env.get_valid_moves()
+                print(f"Your turn. Valid moves: {valid_moves}")
+                try:
+                    move = int(input("Enter your move (0-8): "))
+                    if move not in valid_moves:
+                        print("Invalid move! Try again.")
+                        continue
+                except ValueError:
+                    print("Please enter a number between 0-8")
+                    continue
+                env.step(move)
+            else:  # AI turn
+                print("AI is thinking...")
+                state = env.get_state()
+                valid_moves = env.get_valid_moves()
+                state_tensor = torch.FloatTensor(state).unsqueeze(0).to(self.device)
+                with torch.no_grad():
+                    q_values = model(state_tensor)
+                # Mask invalid moves
+                q_values_np = q_values.cpu().numpy()[0]
+                for i in range(9):
+                    if i not in valid_moves:
+                        q_values_np[i] = -float('inf')
+                action = np.argmax(q_values_np)
+                env.step(action)
+                print(f"AI played move: {action}")
+        print(env.render())
+        if env.winner == 1:
+            print("🎉 You win!")
+        elif env.winner == 2:
+            print("🤖 AI wins!")
+        else:
+            print("🤝 It's a draw!")
+    def run_comprehensive_evaluation(self, num_games_per_model=50):
+        """Evaluate all models in the models directory"""
+        model_files = glob.glob(os.path.join(self.models_dir, "*.pth"))
+        model_files.sort()  # Sort by name to maintain episode order
+        if not model_files:
+            logger.error("No model files found in models directory!")
+            return
+        results = []
+        print(f"\n🔍 Evaluating {len(model_files)} models with {num_games_per_model} games each...")
+        for model_file in model_files:
+            model_name = os.path.basename(model_file)
+            print(f"\nEvaluating: {model_name}")
+            model = self.load_model(model_file)
+            if model is None:
+                continue
+            # Evaluate as both player 1 and player 2
+            stats_p1 = self.evaluate_model(model, num_games_per_model, agent_player=1)
+            stats_p2 = self.evaluate_model(model, num_games_per_model, agent_player=2)
+            # Extract episode number from filename
+            episode_num = self._extract_episode_number(model_name)
+            result = {
+                'model_file': model_name,
+                'episode': episode_num,
+                'win_rate_p1': stats_p1['win_rate'],
+                'win_rate_p2': stats_p2['win_rate'],
+                'avg_win_rate': (stats_p1['win_rate'] + stats_p2['win_rate']) / 2,
+                'wins_p1': stats_p1['wins'],
+                'losses_p1': stats_p1['losses'],
+                'draws_p1': stats_p1['draws'],
+                'wins_p2': stats_p2['wins'],
+                'losses_p2': stats_p2['losses'],
+                'draws_p2': stats_p2['draws'],
+                'avg_reward_p1': stats_p1['avg_reward'],
+                'avg_reward_p2': stats_p2['avg_reward']
+            }
+            results.append(result)
+            print(f"  As Player 1: Win Rate: {stats_p1['win_rate']:.1%}")
+            print(f"  As Player 2: Win Rate: {stats_p2['win_rate']:.1%}")
+            print(f"  Average Win Rate: {result['avg_win_rate']:.1%}")
+        # Save results to CSV
+        self._save_results_to_csv(results)
+        # Create visualization
+        self._create_visualizations(results)
+        # Find and test the best model
+        best_model_info = max(results, key=lambda x: x['avg_win_rate'])
+        best_model_path = os.path.join(self.models_dir, best_model_info['model_file'])
+        best_model = self.load_model(best_model_path)
+        print(f"\n🏆 Best Model: {best_model_info['model_file']}")
+        print(f"   Average Win Rate: {best_model_info['avg_win_rate']:.1%}")
+        # Interactive game with best model
+        while True:
+            play = input("\nWould you like to play against the best model? (y/n): ").lower().strip()
+            if play == 'y':
+                self.play_interactive_game(best_model)
+                play_again = input("\nPlay again? (y/n): ").lower().strip()
+                if play_again != 'y':
+                    break
+            else:
+                break
+    def _extract_episode_number(self, filename):
+        """Extract episode number from filename"""
+        import re
+        match = re.search(r'episode_(\d+)', filename)
+        return int(match.group(1)) if match else 0
+    def _save_results_to_csv(self, results):
+        """Save evaluation results to CSV"""
+        df = pd.DataFrame(results)
+        csv_path = 'model_evaluation_results.csv'
+        df.to_csv(csv_path, index=False)
+        print(f"\n📊 Results saved to: {csv_path}")
+        # Print summary statistics
+        print(f"\n📈 Summary Statistics:")
+        print(f"  Models evaluated: {len(results)}")
+        print(f"  Best win rate: {df['avg_win_rate'].max():.1%}")
+        print(f"  Worst win rate: {df['avg_win_rate'].min():.1%}")
+        print(f"  Average win rate: {df['avg_win_rate'].mean():.1%}")
+    def _create_visualizations(self, results):
+        """Create visualization plots for model performance"""
+        episodes = [r['episode'] for r in results]
+        win_rates_p1 = [r['win_rate_p1'] for r in results]
+        win_rates_p2 = [r['win_rate_p2'] for r in results]
+        avg_win_rates = [r['avg_win_rate'] for r in results]
+        plt.figure(figsize=(12, 8))
+        plt.subplot(2, 2, 1)
+        plt.plot(episodes, win_rates_p1, 'b-', label='As Player 1', alpha=0.7)
+        plt.plot(episodes, win_rates_p2, 'r-', label='As Player 2', alpha=0.7)
+        plt.plot(episodes, avg_win_rates, 'g-', label='Average', linewidth=2)
+        plt.xlabel('Training Episode')
+        plt.ylabel('Win Rate')
+        plt.title('Model Performance vs Random Opponent')
+        plt.legend()
+        plt.grid(True, alpha=0.3)
+        plt.subplot(2, 2, 2)
+        plt.scatter(episodes, avg_win_rates, c=avg_win_rates, cmap='viridis', alpha=0.6)
+        plt.colorbar(label='Win Rate')
+        plt.xlabel('Training Episode')
+        plt.ylabel('Average Win Rate')
+        plt.title('Learning Progress')
+        plt.grid(True, alpha=0.3)
+        plt.subplot(2, 2, 3)
+        outcomes_p1 = np.array([(r['wins_p1'], r['losses_p1'], r['draws_p1']) for r in results])
+        outcomes_p1 = outcomes_p1 / outcomes_p1.sum(axis=1, keepdims=True)
+        plt.stackplot(episodes, outcomes_p1.T, labels=['Wins', 'Losses', 'Draws'], alpha=0.7)
+        plt.xlabel('Training Episode')
+        plt.ylabel('Proportion')
+        plt.title('Outcomes as Player 1')
+        plt.legend()
+        plt.grid(True, alpha=0.3)
+        plt.subplot(2, 2, 4)
+        outcomes_p2 = np.array([(r['wins_p2'], r['losses_p2'], r['draws_p2']) for r in results])
+        outcomes_p2 = outcomes_p2 / outcomes_p2.sum(axis=1, keepdims=True)
+        plt.stackplot(episodes, outcomes_p2.T, labels=['Wins', 'Losses', 'Draws'], alpha=0.7)
+        plt.xlabel('Training Episode')
+        plt.ylabel('Proportion')
+        plt.title('Outcomes as Player 2')
+        plt.legend()
+        plt.grid(True, alpha=0.3)
+        plt.tight_layout()
+        plt.savefig('model_performance_analysis.png', dpi=300, bbox_inches='tight')
+        print("📈 Visualization saved as: model_performance_analysis.png")
+def main():
+    evaluator = ModelEvaluator()
+    print("=" * 60)
+    print("🤖 Dueling DQN Tic-Tac-Toe Model Evaluator")
+    print("=" * 60)
+    while True:
+        print("\nOptions:")
+        print("1. Comprehensive evaluation of all models")
+        print("2. Interactive game with specific model")
+        print("3. Exit")
+        choice = input("\nEnter your choice (1-3): ").strip()
+        if choice == '1':
+            num_games = input("Enter number of games per model (default 50): ").strip()
+            num_games = int(num_games) if num_games.isdigit() else 50
+            evaluator.run_comprehensive_evaluation(num_games)
+        elif choice == '2':
+            model_files = glob.glob("models/*.pth")
+            if not model_files:
+                print("No model files found in models directory!")
+                continue
+            print("\nAvailable models:")
+            for i, model_file in enumerate(model_files, 1):
+                print(f"{i}. {os.path.basename(model_file)}")
+            try:
+                model_choice = int(input(f"\nSelect model (1-{len(model_files)}): ")) - 1
+                if 0 <= model_choice < len(model_files):
+                    model = evaluator.load_model(model_files[model_choice])
+                    if model:
+                        evaluator.play_interactive_game(model)
+                else:
+                    print("Invalid selection!")
+            except ValueError:
+                print("Please enter a valid number!")
+        elif choice == '3':
+            print("Goodbye!")
+            break
+        else:
+            print("Invalid choice! Please enter 1, 2, or 3.")
+if __name__ == '__main__':
+    main()

Tic Tac Toe RL/model_evaluation_results.csv ADDED Viewed

	@@ -0,0 +1,52 @@

+model_file,episode,win_rate_p1,win_rate_p2,avg_win_rate,wins_p1,losses_p1,draws_p1,wins_p2,losses_p2,draws_p2,avg_reward_p1,avg_reward_p2
+dueling_dqn_episode_100.pth,100,0.76,0.44,0.6,38,10,2,22,23,5,7.982000000000002,4.752000000000004
+dueling_dqn_episode_1000.pth,1000,0.58,0.14,0.36,29,17,4,7,30,13,6.278000000000002,1.76
+dueling_dqn_episode_1100.pth,1100,0.52,0.12,0.32,26,12,12,6,35,9,6.004000000000003,1.5460000000000003
+dueling_dqn_episode_1200.pth,1200,0.48,0.22,0.35,24,17,9,11,29,10,5.488000000000003,2.5500000000000007
+dueling_dqn_episode_1300.pth,1300,0.64,0.16,0.4,32,12,6,8,34,8,6.958000000000001,1.9580000000000015
+dueling_dqn_episode_1400.pth,1400,0.52,0.24,0.38,26,11,13,12,33,5,6.072,2.7560000000000016
+dueling_dqn_episode_1500.pth,1500,0.56,0.28,0.42000000000000004,28,14,8,14,30,6,6.238000000000002,3.150000000000003
+dueling_dqn_episode_1600.pth,1600,0.4,0.14,0.27,20,27,3,7,39,4,4.438000000000004,1.7460000000000002
+dueling_dqn_episode_1700.pth,1700,0.36,0.16,0.26,18,26,6,8,39,3,4.160000000000003,1.942000000000001
+dueling_dqn_episode_1800.pth,1800,0.34,0.04,0.19,17,28,5,2,46,2,3.912000000000004,0.7399999999999993
+dueling_dqn_episode_1900.pth,1900,0.38,0.06,0.22,19,26,5,3,43,4,4.320000000000003,0.938
+dueling_dqn_episode_200.pth,200,0.7,0.32,0.51,35,8,7,16,24,10,7.602,3.542000000000002
+dueling_dqn_episode_2000.pth,2000,0.44,0.12,0.28,22,24,4,6,40,4,4.888000000000004,1.5539999999999998
+dueling_dqn_episode_2100.pth,2100,0.32,0.26,0.29000000000000004,16,32,2,13,33,4,3.5820000000000034,2.9460000000000006
+dueling_dqn_episode_2200.pth,2200,0.36,0.14,0.25,18,30,2,7,38,5,3.992000000000003,1.734
+dueling_dqn_episode_2300.pth,2300,0.52,0.14,0.33,26,20,4,7,36,7,5.682,1.743999999999999
+dueling_dqn_episode_2400.pth,2400,0.56,0.22,0.39,28,18,4,11,36,3,6.100000000000001,2.5480000000000014
+dueling_dqn_episode_2500.pth,2500,0.26,0.14,0.2,13,25,12,7,37,6,3.4080000000000026,1.7520000000000002
+dueling_dqn_episode_2600.pth,2600,0.46,0.18,0.32,23,21,6,9,36,5,5.174000000000003,2.134000000000001
+dueling_dqn_episode_2700.pth,2700,0.48,0.2,0.33999999999999997,24,15,11,10,37,3,5.586000000000001,2.3320000000000003
+dueling_dqn_episode_2800.pth,2800,0.3,0.14,0.22,15,22,13,7,42,1,3.8620000000000014,1.7440000000000004
+dueling_dqn_episode_2900.pth,2900,0.4,0.12,0.26,20,23,7,6,42,2,4.600000000000003,1.534
+dueling_dqn_episode_300.pth,300,0.6,0.4,0.5,30,17,3,20,27,3,6.379999999999999,4.330000000000002
+dueling_dqn_episode_3000.pth,3000,0.4,0.18,0.29000000000000004,20,23,7,9,40,1,4.610000000000003,2.1280000000000006
+dueling_dqn_episode_3100.pth,3100,0.48,0.16,0.32,24,20,6,8,40,2,5.3720000000000026,1.9460000000000008
+dueling_dqn_episode_3200.pth,3200,0.24,0.14,0.19,12,26,12,7,42,1,3.2040000000000015,1.7420000000000002
+dueling_dqn_episode_3300.pth,3300,0.34,0.22,0.28,17,27,6,11,37,2,3.960000000000002,2.5380000000000007
+dueling_dqn_episode_3400.pth,3400,0.52,0.1,0.31,26,15,9,5,43,2,5.8980000000000015,1.3279999999999992
+dueling_dqn_episode_3500.pth,3500,0.44,0.2,0.32,22,24,4,10,37,3,4.890000000000004,2.3420000000000005
+dueling_dqn_episode_3600.pth,3600,0.42,0.16,0.29,21,19,10,8,42,0,4.946000000000002,1.9320000000000008
+dueling_dqn_episode_3700.pth,3700,0.28,0.32,0.30000000000000004,14,26,10,16,34,0,3.534000000000002,3.5420000000000016
+dueling_dqn_episode_3800.pth,3800,0.42,0.08,0.25,21,23,6,4,46,0,4.760000000000004,1.1299999999999992
+dueling_dqn_episode_3900.pth,3900,0.36,0.2,0.28,18,24,8,10,37,3,4.250000000000002,2.3280000000000003
+dueling_dqn_episode_400.pth,400,0.54,0.32,0.43000000000000005,27,19,4,16,27,7,5.882000000000002,3.5400000000000036
+dueling_dqn_episode_4000.pth,4000,0.46,0.08,0.27,23,17,10,4,40,6,5.3480000000000025,1.1399999999999995
+dueling_dqn_episode_4100.pth,4100,0.48,0.22,0.35,24,19,7,11,36,3,5.424000000000002,2.5400000000000005
+dueling_dqn_episode_4200.pth,4200,0.44,0.22,0.33,22,18,10,11,31,8,5.130000000000003,2.564000000000001
+dueling_dqn_episode_4300.pth,4300,0.42,0.16,0.29,21,21,8,8,36,6,4.852000000000003,1.9479999999999993
+dueling_dqn_episode_4400.pth,4400,0.64,0.24,0.44,32,9,9,12,30,8,7.092,2.7540000000000013
+dueling_dqn_episode_4500.pth,4500,0.62,0.24,0.43,31,11,8,12,33,5,6.85,2.7500000000000004
+dueling_dqn_episode_4600.pth,4600,0.54,0.26,0.4,27,16,7,13,32,5,6.016000000000003,2.9560000000000013
+dueling_dqn_episode_4700.pth,4700,0.62,0.22,0.42,31,13,6,11,32,7,6.7760000000000025,2.5400000000000005
+dueling_dqn_episode_4800.pth,4800,0.56,0.3,0.43000000000000005,28,16,6,15,30,5,6.172000000000004,3.352000000000002
+dueling_dqn_episode_4900.pth,4900,0.44,0.44,0.44,22,24,4,22,23,5,4.8720000000000026,4.7520000000000024
+dueling_dqn_episode_500.pth,500,0.6,0.16,0.38,30,14,6,8,32,10,6.560000000000003,1.9440000000000008
+dueling_dqn_episode_5000.pth,5000,0.54,0.3,0.42000000000000004,27,18,5,15,26,9,5.928000000000003,3.354000000000002
+dueling_dqn_episode_600.pth,600,0.5,0.16,0.33,25,22,3,8,39,3,5.422000000000002,1.9399999999999995
+dueling_dqn_episode_700.pth,700,0.52,0.12,0.32,26,16,8,6,34,10,5.846000000000003,1.56
+dueling_dqn_episode_800.pth,800,0.42,0.06,0.24,21,23,6,3,37,10,4.752000000000003,0.9339999999999997
+dueling_dqn_episode_900.pth,900,0.4,0.12,0.26,20,22,8,6,39,5,4.646000000000005,1.5459999999999996
+dueling_dqn_latest.pth,0,0.44,0.22,0.33,22,14,14,11,33,6,5.320000000000004,2.5620000000000016

Tic Tac Toe RL/model_performance_analysis.png ADDED Viewed

Git LFS Details

SHA256: 6e803844070c372d8111e48cca8e77494e15733098c2d08bbcfa2e0936c9ae8f
Pointer size: 131 Bytes
Size of remote file: 759 kB

Tic Tac Toe RL/models/dueling_dqn_episode_100.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7889831812ac7b14064856f34ee5ab01ee87dee643e92ac7f69c17fa90edd81
+size 2197905

Tic Tac Toe RL/models/dueling_dqn_episode_1000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa605d7d49dad36efe8306934fa7113b4b228e7cce90ed8f4b6435f6e941dc2a
+size 4454493

Tic Tac Toe RL/models/dueling_dqn_episode_1100.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d803d2395adece1d6400332108be84ba422cb12a84ebcb2215a676945f5a215e
+size 4644317

Tic Tac Toe RL/models/dueling_dqn_episode_1200.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a88625a6137c3739c1e82e59ecf81d8f28b86658b0f6b32deb7ceffd29cc3db2
+size 4851101

Tic Tac Toe RL/models/dueling_dqn_episode_1300.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9384217041ff8518fe68cba9eb04021149c964cb1add5f9de10b66efd1c701c2
+size 5065565

Tic Tac Toe RL/models/dueling_dqn_episode_1400.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f74e2a0d223abc6bf0960cc34192a764a11d3e902dfcb286affb7d1ea3214ab
+size 5130525

Tic Tac Toe RL/models/dueling_dqn_episode_1500.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f29380fab72c71b4ee476e6a314125cb9bb451c397d20b04376fa98cd09afb5d
+size 5146717

Tic Tac Toe RL/models/dueling_dqn_episode_1600.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f7eb75fd7b88b7cbb4adec1e0071928dfd67ea69cba879fa24ce40e6e3bac39
+size 5152861

Tic Tac Toe RL/models/dueling_dqn_episode_1700.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b418935e207e9eaa70bb1779ecc165ba65ffcec8142b77abdb724ff5abc96653
+size 5156765

Tic Tac Toe RL/models/dueling_dqn_episode_1800.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50edd4d7243009e1d5c1e813ad87efc34bc7c01cc5165b81390639e58fd2d8ac
+size 5153437

Tic Tac Toe RL/models/dueling_dqn_episode_1900.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be548035d7be186486838d2265b1c3d95f8fa2b11eda0a8f8c8642223fe5ee48
+size 5152733

Tic Tac Toe RL/models/dueling_dqn_episode_200.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19050edfa22a03f7e0443f695500c1a68307aadf58f2199438ef95a289dc2eec
+size 2417233

Tic Tac Toe RL/models/dueling_dqn_episode_2000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3e4ed4d208f5a75da8e58c70cb83464d9037719a358bebea76b69786d928b9
+size 5152221

Tic Tac Toe RL/models/dueling_dqn_episode_2100.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6be461c61433f8516fcf60b1f9b91a81f9492698b76c831e5dec7748e7fb0914
+size 5152157

Tic Tac Toe RL/models/dueling_dqn_episode_2200.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f61d3fb28f2e81314bebb4375afc14a71b5861cabc5876160e41dc0f581b4aa
+size 5154077

Tic Tac Toe RL/models/dueling_dqn_episode_2300.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddae05e38057f556a29cb233e37ace40274cb3a03b7e0b29319be844cf38870c
+size 5149469

Tic Tac Toe RL/models/dueling_dqn_episode_2400.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4109bcc122f27be544e91d46efb9ac0d2f9769c2ce7ac6c38689672f42deacc
+size 5150621

Tic Tac Toe RL/models/dueling_dqn_episode_2500.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8d8807d7202152b6b65a158ca4385529eccb34943657b1349d1fe7b4ff332c3f
+size 5153629

Tic Tac Toe RL/models/dueling_dqn_episode_2600.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11b052c2ff7adc88fd510639db8e4a96eeaab48fcf2ea5b24b7343532766f1c1
+size 5150877

Tic Tac Toe RL/models/dueling_dqn_episode_2700.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:015cd27b690ab04191663a5ceb67e29ace9d36b3c65204360dc38c3acd14f531
+size 5150173

Tic Tac Toe RL/models/dueling_dqn_episode_2800.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:91b10e1a94413e0c6fe284d706b5a735c7480025285a80906c2f08c46b9b72bc
+size 5149789

Tic Tac Toe RL/models/dueling_dqn_episode_2900.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cba87fbbd59e713bc4e6d217762ea09ee2c3b9c06b2d09f1ceb40efe513e459
+size 5146397

Tic Tac Toe RL/models/dueling_dqn_episode_300.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:766075d03432a67b90d07f5bae5aae62741318d78717c1f5191a3563ff3801a8
+size 2659281

Tic Tac Toe RL/models/dueling_dqn_episode_3000.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40f78912717adff486ef96d9f410e696fdce3af785a3907ddad83f458db60be5
+size 5145501

Tic Tac Toe RL/models/dueling_dqn_episode_3100.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:216edbd69bf3c21d246e4a82566b3a9eea7e9b2a989a81413079da9404dd6a50
+size 5144285

Tic Tac Toe RL/models/dueling_dqn_episode_3200.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7e0e306d3ed614aa648a650e174e3baad4dcfeb7f0e3855cb4bdc8942eebeab
+size 5145629

Tic Tac Toe RL/models/dueling_dqn_episode_3300.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b1f1f8404693a759e62b1f985bd5a7b7f2c81ade12935ab95f5d0ce07ed0675
+size 5149661

Tic Tac Toe RL/models/dueling_dqn_episode_3400.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e925beaad67ac5a006ee6f74a7db596640c1217ac1a540e311e80ca50798427c
+size 5151965

Tic Tac Toe RL/models/dueling_dqn_episode_3500.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:213d2e33f51609a0b0442065eafb77c4316280f78c059bce99c55aec2645c193
+size 5151581

Tic Tac Toe RL/models/dueling_dqn_episode_3600.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bae1cb761c8b82b183f2b057dc06006b58e5e08763d106b4d2fc91b7eee66916
+size 5144733

Tic Tac Toe RL/models/dueling_dqn_episode_3700.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85721bf70830cce1615d227ad69238e6b045104b68c8c4c2105e6509d112223d
+size 5136349