#!/usr/bin/env python3 """ Helion-2.5-Rnd Python Client Easy-to-use client for interacting with Helion inference server """ import json import requests from typing import Dict, Generator, List, Optional, Union class HelionClient: """Client for Helion-2.5-Rnd inference API""" def __init__( self, base_url: str = "http://localhost:8000", api_key: Optional[str] = None, timeout: int = 300 ): """ Initialize Helion client Args: base_url: Base URL of the inference server api_key: Optional API key for authentication timeout: Request timeout in seconds """ self.base_url = base_url.rstrip('/') self.timeout = timeout self.headers = { "Content-Type": "application/json" } if api_key: self.headers["Authorization"] = f"Bearer {api_key}" def chat( self, messages: List[Dict[str, str]], temperature: float = 0.7, max_tokens: int = 4096, stream: bool = False, **kwargs ) -> Union[str, Generator[str, None, None]]: """ Send a chat completion request Args: messages: List of message dicts with 'role' and 'content' temperature: Sampling temperature (0.0 to 2.0) max_tokens: Maximum tokens to generate stream: Whether to stream the response **kwargs: Additional parameters Returns: Generated text or generator for streaming """ payload = { "messages": messages, "temperature": temperature, "max_tokens": max_tokens, "stream": stream, **kwargs } if stream: return self._stream_chat(payload) else: return self._complete_chat(payload) def _complete_chat(self, payload: Dict) -> str: """Non-streaming chat completion""" response = requests.post( f"{self.base_url}/v1/chat/completions", headers=self.headers, json=payload, timeout=self.timeout ) response.raise_for_status() data = response.json() return data["choices"][0]["message"]["content"] def _stream_chat(self, payload: Dict) -> Generator[str, None, None]: """Streaming chat completion""" response = requests.post( f"{self.base_url}/v1/chat/completions", headers=self.headers, json=payload, stream=True, timeout=self.timeout ) response.raise_for_status() for line in response.iter_lines(): if line: line = line.decode('utf-8') if line.startswith('data: '): data_str = line[6:] if data_str == '[DONE]': break try: data = json.loads(data_str) delta = data["choices"][0]["delta"].get("content", "") if delta: yield delta except json.JSONDecodeError: continue def complete( self, prompt: str, temperature: float = 0.7, max_tokens: int = 4096, stream: bool = False, **kwargs ) -> Union[str, Generator[str, None, None]]: """ Send a text completion request Args: prompt: Input text prompt temperature: Sampling temperature max_tokens: Maximum tokens to generate stream: Whether to stream the response **kwargs: Additional parameters Returns: Generated text or generator for streaming """ messages = [{"role": "user", "content": prompt}] return self.chat( messages=messages, temperature=temperature, max_tokens=max_tokens, stream=stream, **kwargs ) def health_check(self) -> Dict: """Check server health""" response = requests.get( f"{self.base_url}/health", headers=self.headers, timeout=10 ) response.raise_for_status() return response.json() def list_models(self) -> List[Dict]: """List available models""" response = requests.get( f"{self.base_url}/v1/models", headers=self.headers, timeout=10 ) response.raise_for_status() return response.json()["data"] class HelionAssistant: """High-level assistant interface for Helion""" def __init__( self, base_url: str = "http://localhost:8000", system_prompt: Optional[str] = None, **client_kwargs ): """ Initialize Helion assistant Args: base_url: Base URL of inference server system_prompt: System prompt to use for all conversations **client_kwargs: Additional arguments for HelionClient """ self.client = HelionClient(base_url=base_url, **client_kwargs) self.system_prompt = system_prompt or ( "You are Helion, an advanced AI assistant developed by DeepXR. " "You are helpful, harmless, and honest." ) self.conversation_history: List[Dict[str, str]] = [] def chat( self, message: str, temperature: float = 0.7, max_tokens: int = 4096, stream: bool = False, reset_history: bool = False ) -> Union[str, Generator[str, None, None]]: """ Chat with the assistant Args: message: User message temperature: Sampling temperature max_tokens: Maximum tokens to generate stream: Whether to stream the response reset_history: Whether to reset conversation history Returns: Assistant response """ if reset_history: self.conversation_history = [] # Build messages messages = [{"role": "system", "content": self.system_prompt}] messages.extend(self.conversation_history) messages.append({"role": "user", "content": message}) # Get response if stream: return self._stream_and_store(messages, temperature, max_tokens, message) else: response = self.client.chat( messages=messages, temperature=temperature, max_tokens=max_tokens, stream=False ) # Update history self.conversation_history.append({"role": "user", "content": message}) self.conversation_history.append({"role": "assistant", "content": response}) return response def _stream_and_store( self, messages: List[Dict], temperature: float, max_tokens: int, user_message: str ) -> Generator[str, None, None]: """Stream response and store in history""" full_response = "" for chunk in self.client.chat( messages=messages, temperature=temperature, max_tokens=max_tokens, stream=True ): full_response += chunk yield chunk # Update history after streaming complete self.conversation_history.append({"role": "user", "content": user_message}) self.conversation_history.append({"role": "assistant", "content": full_response}) def reset(self): """Reset conversation history""" self.conversation_history = [] def get_history(self) -> List[Dict[str, str]]: """Get conversation history""" return self.conversation_history.copy() # Example usage def example_usage(): """Example usage of Helion client""" # Initialize client client = HelionClient(base_url="http://localhost:8000") # Check health health = client.health_check() print(f"Server status: {health['status']}") # Simple completion response = client.complete( "Explain quantum computing in simple terms:", temperature=0.7, max_tokens=500 ) print(f"\nResponse: {response}") # Chat with conversation messages = [ {"role": "system", "content": "You are a helpful coding assistant."}, {"role": "user", "content": "Write a Python function to calculate fibonacci numbers"} ] response = client.chat(messages=messages, temperature=0.3) print(f"\nCode: {response}") # Streaming example print("\nStreaming response:") for chunk in client.complete("Tell me a short story about AI:", stream=True): print(chunk, end='', flush=True) print() # Using assistant interface assistant = HelionAssistant() response = assistant.chat("What is machine learning?") print(f"\nAssistant: {response}") # Continue conversation response = assistant.chat("Can you give me an example?") print(f"\nAssistant: {response}") if __name__ == "__main__": example_usage()