#!/usr/bin/env python3
"""
Helion-2.5-Rnd Python Client
Easy-to-use client for interacting with Helion inference server
"""

import json
import requests
from typing import Dict, Generator, List, Optional, Union


class HelionClient:
    """Client for Helion-2.5-Rnd inference API"""
    
    def __init__(
        self,
        base_url: str = "http://localhost:8000",
        api_key: Optional[str] = None,
        timeout: int = 300
    ):
        """
        Initialize Helion client
        
        Args:
            base_url: Base URL of the inference server
            api_key: Optional API key for authentication
            timeout: Request timeout in seconds
        """
        self.base_url = base_url.rstrip('/')
        self.timeout = timeout
        self.headers = {
            "Content-Type": "application/json"
        }
        if api_key:
            self.headers["Authorization"] = f"Bearer {api_key}"
    
    def chat(
        self,
        messages: List[Dict[str, str]],
        temperature: float = 0.7,
        max_tokens: int = 4096,
        stream: bool = False,
        **kwargs
    ) -> Union[str, Generator[str, None, None]]:
        """
        Send a chat completion request
        
        Args:
            messages: List of message dicts with 'role' and 'content'
            temperature: Sampling temperature (0.0 to 2.0)
            max_tokens: Maximum tokens to generate
            stream: Whether to stream the response
            **kwargs: Additional parameters
        
        Returns:
            Generated text or generator for streaming
        """
        payload = {
            "messages": messages,
            "temperature": temperature,
            "max_tokens": max_tokens,
            "stream": stream,
            **kwargs
        }
        
        if stream:
            return self._stream_chat(payload)
        else:
            return self._complete_chat(payload)
    
    def _complete_chat(self, payload: Dict) -> str:
        """Non-streaming chat completion"""
        response = requests.post(
            f"{self.base_url}/v1/chat/completions",
            headers=self.headers,
            json=payload,
            timeout=self.timeout
        )
        response.raise_for_status()
        
        data = response.json()
        return data["choices"][0]["message"]["content"]
    
    def _stream_chat(self, payload: Dict) -> Generator[str, None, None]:
        """Streaming chat completion"""
        response = requests.post(
            f"{self.base_url}/v1/chat/completions",
            headers=self.headers,
            json=payload,
            stream=True,
            timeout=self.timeout
        )
        response.raise_for_status()
        
        for line in response.iter_lines():
            if line:
                line = line.decode('utf-8')
                if line.startswith('data: '):
                    data_str = line[6:]
                    if data_str == '[DONE]':
                        break
                    
                    try:
                        data = json.loads(data_str)
                        delta = data["choices"][0]["delta"].get("content", "")
                        if delta:
                            yield delta
                    except json.JSONDecodeError:
                        continue
    
    def complete(
        self,
        prompt: str,
        temperature: float = 0.7,
        max_tokens: int = 4096,
        stream: bool = False,
        **kwargs
    ) -> Union[str, Generator[str, None, None]]:
        """
        Send a text completion request
        
        Args:
            prompt: Input text prompt
            temperature: Sampling temperature
            max_tokens: Maximum tokens to generate
            stream: Whether to stream the response
            **kwargs: Additional parameters
        
        Returns:
            Generated text or generator for streaming
        """
        messages = [{"role": "user", "content": prompt}]
        return self.chat(
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            stream=stream,
            **kwargs
        )
    
    def health_check(self) -> Dict:
        """Check server health"""
        response = requests.get(
            f"{self.base_url}/health",
            headers=self.headers,
            timeout=10
        )
        response.raise_for_status()
        return response.json()
    
    def list_models(self) -> List[Dict]:
        """List available models"""
        response = requests.get(
            f"{self.base_url}/v1/models",
            headers=self.headers,
            timeout=10
        )
        response.raise_for_status()
        return response.json()["data"]


class HelionAssistant:
    """High-level assistant interface for Helion"""
    
    def __init__(
        self,
        base_url: str = "http://localhost:8000",
        system_prompt: Optional[str] = None,
        **client_kwargs
    ):
        """
        Initialize Helion assistant
        
        Args:
            base_url: Base URL of inference server
            system_prompt: System prompt to use for all conversations
            **client_kwargs: Additional arguments for HelionClient
        """
        self.client = HelionClient(base_url=base_url, **client_kwargs)
        self.system_prompt = system_prompt or (
            "You are Helion, an advanced AI assistant developed by DeepXR. "
            "You are helpful, harmless, and honest."
        )
        self.conversation_history: List[Dict[str, str]] = []
    
    def chat(
        self,
        message: str,
        temperature: float = 0.7,
        max_tokens: int = 4096,
        stream: bool = False,
        reset_history: bool = False
    ) -> Union[str, Generator[str, None, None]]:
        """
        Chat with the assistant
        
        Args:
            message: User message
            temperature: Sampling temperature
            max_tokens: Maximum tokens to generate
            stream: Whether to stream the response
            reset_history: Whether to reset conversation history
        
        Returns:
            Assistant response
        """
        if reset_history:
            self.conversation_history = []
        
        # Build messages
        messages = [{"role": "system", "content": self.system_prompt}]
        messages.extend(self.conversation_history)
        messages.append({"role": "user", "content": message})
        
        # Get response
        if stream:
            return self._stream_and_store(messages, temperature, max_tokens, message)
        else:
            response = self.client.chat(
                messages=messages,
                temperature=temperature,
                max_tokens=max_tokens,
                stream=False
            )
            
            # Update history
            self.conversation_history.append({"role": "user", "content": message})
            self.conversation_history.append({"role": "assistant", "content": response})
            
            return response
    
    def _stream_and_store(
        self,
        messages: List[Dict],
        temperature: float,
        max_tokens: int,
        user_message: str
    ) -> Generator[str, None, None]:
        """Stream response and store in history"""
        full_response = ""
        
        for chunk in self.client.chat(
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            stream=True
        ):
            full_response += chunk
            yield chunk
        
        # Update history after streaming complete
        self.conversation_history.append({"role": "user", "content": user_message})
        self.conversation_history.append({"role": "assistant", "content": full_response})
    
    def reset(self):
        """Reset conversation history"""
        self.conversation_history = []
    
    def get_history(self) -> List[Dict[str, str]]:
        """Get conversation history"""
        return self.conversation_history.copy()


# Example usage
def example_usage():
    """Example usage of Helion client"""
    
    # Initialize client
    client = HelionClient(base_url="http://localhost:8000")
    
    # Check health
    health = client.health_check()
    print(f"Server status: {health['status']}")
    
    # Simple completion
    response = client.complete(
        "Explain quantum computing in simple terms:",
        temperature=0.7,
        max_tokens=500
    )
    print(f"\nResponse: {response}")
    
    # Chat with conversation
    messages = [
        {"role": "system", "content": "You are a helpful coding assistant."},
        {"role": "user", "content": "Write a Python function to calculate fibonacci numbers"}
    ]
    
    response = client.chat(messages=messages, temperature=0.3)
    print(f"\nCode: {response}")
    
    # Streaming example
    print("\nStreaming response:")
    for chunk in client.complete("Tell me a short story about AI:", stream=True):
        print(chunk, end='', flush=True)
    print()
    
    # Using assistant interface
    assistant = HelionAssistant()
    response = assistant.chat("What is machine learning?")
    print(f"\nAssistant: {response}")
    
    # Continue conversation
    response = assistant.chat("Can you give me an example?")
    print(f"\nAssistant: {response}")


if __name__ == "__main__":
    example_usage()