DeepXR
/

Helion-V2.5-Rnd

+#!/usr/bin/env python3
+"""
+Helion-2.5-Rnd Python Client
+Easy-to-use client for interacting with Helion inference server
+"""
+import json
+import requests
+from typing import Dict, Generator, List, Optional, Union
+class HelionClient:
+    """Client for Helion-2.5-Rnd inference API"""
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8000",
+        api_key: Optional[str] = None,
+        timeout: int = 300
+    ):
+        """
+        Initialize Helion client
+        Args:
+            base_url: Base URL of the inference server
+            api_key: Optional API key for authentication
+            timeout: Request timeout in seconds
+        """
+        self.base_url = base_url.rstrip('/')
+        self.timeout = timeout
+        self.headers = {
+            "Content-Type": "application/json"
+        }
+        if api_key:
+            self.headers["Authorization"] = f"Bearer {api_key}"
+    def chat(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float = 0.7,
+        max_tokens: int = 4096,
+        stream: bool = False,
+        **kwargs
+    ) -> Union[str, Generator[str, None, None]]:
+        """
+        Send a chat completion request
+        Args:
+            messages: List of message dicts with 'role' and 'content'
+            temperature: Sampling temperature (0.0 to 2.0)
+            max_tokens: Maximum tokens to generate
+            stream: Whether to stream the response
+            **kwargs: Additional parameters
+        Returns:
+            Generated text or generator for streaming
+        """
+        payload = {
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+            "stream": stream,
+            **kwargs
+        }
+        if stream:
+            return self._stream_chat(payload)
+        else:
+            return self._complete_chat(payload)
+    def _complete_chat(self, payload: Dict) -> str:
+        """Non-streaming chat completion"""
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers=self.headers,
+            json=payload,
+            timeout=self.timeout
+        )
+        response.raise_for_status()
+        data = response.json()
+        return data["choices"][0]["message"]["content"]
+    def _stream_chat(self, payload: Dict) -> Generator[str, None, None]:
+        """Streaming chat completion"""
+        response = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            headers=self.headers,
+            json=payload,
+            stream=True,
+            timeout=self.timeout
+        )
+        response.raise_for_status()
+        for line in response.iter_lines():
+            if line:
+                line = line.decode('utf-8')
+                if line.startswith('data: '):
+                    data_str = line[6:]
+                    if data_str == '[DONE]':
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        delta = data["choices"][0]["delta"].get("content", "")
+                        if delta:
+                            yield delta
+                    except json.JSONDecodeError:
+                        continue
+    def complete(
+        self,
+        prompt: str,
+        temperature: float = 0.7,
+        max_tokens: int = 4096,
+        stream: bool = False,
+        **kwargs
+    ) -> Union[str, Generator[str, None, None]]:
+        """
+        Send a text completion request
+        Args:
+            prompt: Input text prompt
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            stream: Whether to stream the response
+            **kwargs: Additional parameters
+        Returns:
+            Generated text or generator for streaming
+        """
+        messages = [{"role": "user", "content": prompt}]
+        return self.chat(
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            stream=stream,
+            **kwargs
+        )
+    def health_check(self) -> Dict:
+        """Check server health"""
+        response = requests.get(
+            f"{self.base_url}/health",
+            headers=self.headers,
+            timeout=10
+        )
+        response.raise_for_status()
+        return response.json()
+    def list_models(self) -> List[Dict]:
+        """List available models"""
+        response = requests.get(
+            f"{self.base_url}/v1/models",
+            headers=self.headers,
+            timeout=10
+        )
+        response.raise_for_status()
+        return response.json()["data"]
+class HelionAssistant:
+    """High-level assistant interface for Helion"""
+    def __init__(
+        self,
+        base_url: str = "http://localhost:8000",
+        system_prompt: Optional[str] = None,
+        **client_kwargs
+    ):
+        """
+        Initialize Helion assistant
+        Args:
+            base_url: Base URL of inference server
+            system_prompt: System prompt to use for all conversations
+            **client_kwargs: Additional arguments for HelionClient
+        """
+        self.client = HelionClient(base_url=base_url, **client_kwargs)
+        self.system_prompt = system_prompt or (
+            "You are Helion, an advanced AI assistant developed by DeepXR. "
+            "You are helpful, harmless, and honest."
+        )
+        self.conversation_history: List[Dict[str, str]] = []
+    def chat(
+        self,
+        message: str,
+        temperature: float = 0.7,
+        max_tokens: int = 4096,
+        stream: bool = False,
+        reset_history: bool = False
+    ) -> Union[str, Generator[str, None, None]]:
+        """
+        Chat with the assistant
+        Args:
+            message: User message
+            temperature: Sampling temperature
+            max_tokens: Maximum tokens to generate
+            stream: Whether to stream the response
+            reset_history: Whether to reset conversation history
+        Returns:
+            Assistant response
+        """
+        if reset_history:
+            self.conversation_history = []
+        # Build messages
+        messages = [{"role": "system", "content": self.system_prompt}]
+        messages.extend(self.conversation_history)
+        messages.append({"role": "user", "content": message})
+        # Get response
+        if stream:
+            return self._stream_and_store(messages, temperature, max_tokens, message)
+        else:
+            response = self.client.chat(
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                stream=False
+            )
+            # Update history
+            self.conversation_history.append({"role": "user", "content": message})
+            self.conversation_history.append({"role": "assistant", "content": response})
+            return response
+    def _stream_and_store(
+        self,
+        messages: List[Dict],
+        temperature: float,
+        max_tokens: int,
+        user_message: str
+    ) -> Generator[str, None, None]:
+        """Stream response and store in history"""
+        full_response = ""
+        for chunk in self.client.chat(
+            messages=messages,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            stream=True
+        ):
+            full_response += chunk
+            yield chunk
+        # Update history after streaming complete
+        self.conversation_history.append({"role": "user", "content": user_message})
+        self.conversation_history.append({"role": "assistant", "content": full_response})
+    def reset(self):
+        """Reset conversation history"""
+        self.conversation_history = []
+    def get_history(self) -> List[Dict[str, str]]:
+        """Get conversation history"""
+        return self.conversation_history.copy()
+# Example usage
+def example_usage():
+    """Example usage of Helion client"""
+    # Initialize client
+    client = HelionClient(base_url="http://localhost:8000")
+    # Check health
+    health = client.health_check()
+    print(f"Server status: {health['status']}")
+    # Simple completion
+    response = client.complete(
+        "Explain quantum computing in simple terms:",
+        temperature=0.7,
+        max_tokens=500
+    )
+    print(f"\nResponse: {response}")
+    # Chat with conversation
+    messages = [
+        {"role": "system", "content": "You are a helpful coding assistant."},
+        {"role": "user", "content": "Write a Python function to calculate fibonacci numbers"}
+    ]
+    response = client.chat(messages=messages, temperature=0.3)
+    print(f"\nCode: {response}")
+    # Streaming example
+    print("\nStreaming response:")
+    for chunk in client.complete("Tell me a short story about AI:", stream=True):
+        print(chunk, end='', flush=True)
+    print()
+    # Using assistant interface
+    assistant = HelionAssistant()
+    response = assistant.chat("What is machine learning?")
+    print(f"\nAssistant: {response}")
+    # Continue conversation
+    response = assistant.chat("Can you give me an example?")
+    print(f"\nAssistant: {response}")
+if __name__ == "__main__":
+    example_usage()