|
|
|
|
|
""" |
|
|
Helion-2.5-Rnd Python Client |
|
|
Easy-to-use client for interacting with Helion inference server |
|
|
""" |
|
|
|
|
|
import json |
|
|
import requests |
|
|
from typing import Dict, Generator, List, Optional, Union |
|
|
|
|
|
|
|
|
class HelionClient: |
|
|
"""Client for Helion-2.5-Rnd inference API""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
base_url: str = "http://localhost:8000", |
|
|
api_key: Optional[str] = None, |
|
|
timeout: int = 300 |
|
|
): |
|
|
""" |
|
|
Initialize Helion client |
|
|
|
|
|
Args: |
|
|
base_url: Base URL of the inference server |
|
|
api_key: Optional API key for authentication |
|
|
timeout: Request timeout in seconds |
|
|
""" |
|
|
self.base_url = base_url.rstrip('/') |
|
|
self.timeout = timeout |
|
|
self.headers = { |
|
|
"Content-Type": "application/json" |
|
|
} |
|
|
if api_key: |
|
|
self.headers["Authorization"] = f"Bearer {api_key}" |
|
|
|
|
|
def chat( |
|
|
self, |
|
|
messages: List[Dict[str, str]], |
|
|
temperature: float = 0.7, |
|
|
max_tokens: int = 4096, |
|
|
stream: bool = False, |
|
|
**kwargs |
|
|
) -> Union[str, Generator[str, None, None]]: |
|
|
""" |
|
|
Send a chat completion request |
|
|
|
|
|
Args: |
|
|
messages: List of message dicts with 'role' and 'content' |
|
|
temperature: Sampling temperature (0.0 to 2.0) |
|
|
max_tokens: Maximum tokens to generate |
|
|
stream: Whether to stream the response |
|
|
**kwargs: Additional parameters |
|
|
|
|
|
Returns: |
|
|
Generated text or generator for streaming |
|
|
""" |
|
|
payload = { |
|
|
"messages": messages, |
|
|
"temperature": temperature, |
|
|
"max_tokens": max_tokens, |
|
|
"stream": stream, |
|
|
**kwargs |
|
|
} |
|
|
|
|
|
if stream: |
|
|
return self._stream_chat(payload) |
|
|
else: |
|
|
return self._complete_chat(payload) |
|
|
|
|
|
def _complete_chat(self, payload: Dict) -> str: |
|
|
"""Non-streaming chat completion""" |
|
|
response = requests.post( |
|
|
f"{self.base_url}/v1/chat/completions", |
|
|
headers=self.headers, |
|
|
json=payload, |
|
|
timeout=self.timeout |
|
|
) |
|
|
response.raise_for_status() |
|
|
|
|
|
data = response.json() |
|
|
return data["choices"][0]["message"]["content"] |
|
|
|
|
|
def _stream_chat(self, payload: Dict) -> Generator[str, None, None]: |
|
|
"""Streaming chat completion""" |
|
|
response = requests.post( |
|
|
f"{self.base_url}/v1/chat/completions", |
|
|
headers=self.headers, |
|
|
json=payload, |
|
|
stream=True, |
|
|
timeout=self.timeout |
|
|
) |
|
|
response.raise_for_status() |
|
|
|
|
|
for line in response.iter_lines(): |
|
|
if line: |
|
|
line = line.decode('utf-8') |
|
|
if line.startswith('data: '): |
|
|
data_str = line[6:] |
|
|
if data_str == '[DONE]': |
|
|
break |
|
|
|
|
|
try: |
|
|
data = json.loads(data_str) |
|
|
delta = data["choices"][0]["delta"].get("content", "") |
|
|
if delta: |
|
|
yield delta |
|
|
except json.JSONDecodeError: |
|
|
continue |
|
|
|
|
|
def complete( |
|
|
self, |
|
|
prompt: str, |
|
|
temperature: float = 0.7, |
|
|
max_tokens: int = 4096, |
|
|
stream: bool = False, |
|
|
**kwargs |
|
|
) -> Union[str, Generator[str, None, None]]: |
|
|
""" |
|
|
Send a text completion request |
|
|
|
|
|
Args: |
|
|
prompt: Input text prompt |
|
|
temperature: Sampling temperature |
|
|
max_tokens: Maximum tokens to generate |
|
|
stream: Whether to stream the response |
|
|
**kwargs: Additional parameters |
|
|
|
|
|
Returns: |
|
|
Generated text or generator for streaming |
|
|
""" |
|
|
messages = [{"role": "user", "content": prompt}] |
|
|
return self.chat( |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
max_tokens=max_tokens, |
|
|
stream=stream, |
|
|
**kwargs |
|
|
) |
|
|
|
|
|
def health_check(self) -> Dict: |
|
|
"""Check server health""" |
|
|
response = requests.get( |
|
|
f"{self.base_url}/health", |
|
|
headers=self.headers, |
|
|
timeout=10 |
|
|
) |
|
|
response.raise_for_status() |
|
|
return response.json() |
|
|
|
|
|
def list_models(self) -> List[Dict]: |
|
|
"""List available models""" |
|
|
response = requests.get( |
|
|
f"{self.base_url}/v1/models", |
|
|
headers=self.headers, |
|
|
timeout=10 |
|
|
) |
|
|
response.raise_for_status() |
|
|
return response.json()["data"] |
|
|
|
|
|
|
|
|
class HelionAssistant: |
|
|
"""High-level assistant interface for Helion""" |
|
|
|
|
|
def __init__( |
|
|
self, |
|
|
base_url: str = "http://localhost:8000", |
|
|
system_prompt: Optional[str] = None, |
|
|
**client_kwargs |
|
|
): |
|
|
""" |
|
|
Initialize Helion assistant |
|
|
|
|
|
Args: |
|
|
base_url: Base URL of inference server |
|
|
system_prompt: System prompt to use for all conversations |
|
|
**client_kwargs: Additional arguments for HelionClient |
|
|
""" |
|
|
self.client = HelionClient(base_url=base_url, **client_kwargs) |
|
|
self.system_prompt = system_prompt or ( |
|
|
"You are Helion, an advanced AI assistant developed by DeepXR. " |
|
|
"You are helpful, harmless, and honest." |
|
|
) |
|
|
self.conversation_history: List[Dict[str, str]] = [] |
|
|
|
|
|
def chat( |
|
|
self, |
|
|
message: str, |
|
|
temperature: float = 0.7, |
|
|
max_tokens: int = 4096, |
|
|
stream: bool = False, |
|
|
reset_history: bool = False |
|
|
) -> Union[str, Generator[str, None, None]]: |
|
|
""" |
|
|
Chat with the assistant |
|
|
|
|
|
Args: |
|
|
message: User message |
|
|
temperature: Sampling temperature |
|
|
max_tokens: Maximum tokens to generate |
|
|
stream: Whether to stream the response |
|
|
reset_history: Whether to reset conversation history |
|
|
|
|
|
Returns: |
|
|
Assistant response |
|
|
""" |
|
|
if reset_history: |
|
|
self.conversation_history = [] |
|
|
|
|
|
|
|
|
messages = [{"role": "system", "content": self.system_prompt}] |
|
|
messages.extend(self.conversation_history) |
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
if stream: |
|
|
return self._stream_and_store(messages, temperature, max_tokens, message) |
|
|
else: |
|
|
response = self.client.chat( |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
max_tokens=max_tokens, |
|
|
stream=False |
|
|
) |
|
|
|
|
|
|
|
|
self.conversation_history.append({"role": "user", "content": message}) |
|
|
self.conversation_history.append({"role": "assistant", "content": response}) |
|
|
|
|
|
return response |
|
|
|
|
|
def _stream_and_store( |
|
|
self, |
|
|
messages: List[Dict], |
|
|
temperature: float, |
|
|
max_tokens: int, |
|
|
user_message: str |
|
|
) -> Generator[str, None, None]: |
|
|
"""Stream response and store in history""" |
|
|
full_response = "" |
|
|
|
|
|
for chunk in self.client.chat( |
|
|
messages=messages, |
|
|
temperature=temperature, |
|
|
max_tokens=max_tokens, |
|
|
stream=True |
|
|
): |
|
|
full_response += chunk |
|
|
yield chunk |
|
|
|
|
|
|
|
|
self.conversation_history.append({"role": "user", "content": user_message}) |
|
|
self.conversation_history.append({"role": "assistant", "content": full_response}) |
|
|
|
|
|
def reset(self): |
|
|
"""Reset conversation history""" |
|
|
self.conversation_history = [] |
|
|
|
|
|
def get_history(self) -> List[Dict[str, str]]: |
|
|
"""Get conversation history""" |
|
|
return self.conversation_history.copy() |
|
|
|
|
|
|
|
|
|
|
|
def example_usage(): |
|
|
"""Example usage of Helion client""" |
|
|
|
|
|
|
|
|
client = HelionClient(base_url="http://localhost:8000") |
|
|
|
|
|
|
|
|
health = client.health_check() |
|
|
print(f"Server status: {health['status']}") |
|
|
|
|
|
|
|
|
response = client.complete( |
|
|
"Explain quantum computing in simple terms:", |
|
|
temperature=0.7, |
|
|
max_tokens=500 |
|
|
) |
|
|
print(f"\nResponse: {response}") |
|
|
|
|
|
|
|
|
messages = [ |
|
|
{"role": "system", "content": "You are a helpful coding assistant."}, |
|
|
{"role": "user", "content": "Write a Python function to calculate fibonacci numbers"} |
|
|
] |
|
|
|
|
|
response = client.chat(messages=messages, temperature=0.3) |
|
|
print(f"\nCode: {response}") |
|
|
|
|
|
|
|
|
print("\nStreaming response:") |
|
|
for chunk in client.complete("Tell me a short story about AI:", stream=True): |
|
|
print(chunk, end='', flush=True) |
|
|
print() |
|
|
|
|
|
|
|
|
assistant = HelionAssistant() |
|
|
response = assistant.chat("What is machine learning?") |
|
|
print(f"\nAssistant: {response}") |
|
|
|
|
|
|
|
|
response = assistant.chat("Can you give me an example?") |
|
|
print(f"\nAssistant: {response}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
example_usage() |