| model: | |
| name: "DeepXR/Helion-2.5-Rnd" | |
| version: "2.5.0-research" | |
| type: "transformer" | |
| architecture: "llama" | |
| description: "Helion-2.5 Research & Development - Advanced multimodal language model" | |
| capabilities: | |
| - text_generation | |
| - code_generation | |
| - mathematical_reasoning | |
| - multilingual_understanding | |
| - instruction_following | |
| - context_understanding | |
| - creative_writing | |
| - analytical_reasoning | |
| - scientific_computation | |
| - conversational_ai | |
| model_parameters: | |
| hidden_size: 4096 | |
| num_hidden_layers: 32 | |
| num_attention_heads: 32 | |
| num_key_value_heads: 8 | |
| intermediate_size: 14336 | |
| vocab_size: 128256 | |
| max_position_embeddings: 131072 | |
| rope_theta: 500000.0 | |
| rope_scaling: | |
| type: "yarn" | |
| factor: 8.0 | |
| original_max_position_embeddings: 16384 | |
| attention_bias: false | |
| attention_dropout: 0.0 | |
| mlp_bias: false | |
| tokenizer: | |
| type: "sentencepiece" | |
| model_max_length: 131072 | |
| padding_side: "right" | |
| truncation_side: "right" | |
| chat_template: "{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}{% endfor %}{{ '<|im_start|>assistant\n' }}" | |
| training: | |
| training_steps: 150000 | |
| warmup_steps: 2000 | |
| learning_rate: 2.0e-5 | |
| weight_decay: 0.01 | |
| gradient_accumulation_steps: 8 | |
| per_device_batch_size: 4 | |
| fp16: false | |
| bf16: true | |
| optimization: | |
| optimizer: "adamw_torch_fused" | |
| scheduler: "cosine_with_restarts" | |
| gradient_checkpointing: true | |
| flash_attention: true | |
| tensor_parallel_size: 4 | |
| pipeline_parallel_size: 2 | |
| quantization: | |
| bits: 16 | |
| precision: "float16" | |
| supported_formats: | |
| - "fp16" | |
| note: "Model is provided in full FP16 precision without quantization" | |
| inference: | |
| default_parameters: | |
| temperature: 0.7 | |
| top_p: 0.9 | |
| top_k: 50 | |
| repetition_penalty: 1.1 | |
| max_new_tokens: 4096 | |
| do_sample: true | |
| num_beams: 1 | |
| generation_config: | |
| pad_token_id: 128001 | |
| bos_token_id: 128000 | |
| eos_token_id: 128009 | |
| use_cache: true | |
| output_attentions: false | |
| output_hidden_states: false | |
| return_dict_in_generate: true | |
| performance: | |
| batch_size: 1 | |
| max_batch_size: 32 | |
| streaming: true | |
| gpu_memory_utilization: 0.95 | |
| tensor_parallel: true | |
| special_tokens: | |
| bos_token: "<|begin_of_text|>" | |
| eos_token: "<|end_of_text|>" | |
| pad_token: "<|pad|>" | |
| unk_token: "<|unk|>" | |
| system_token: "<|im_start|>system" | |
| user_token: "<|im_start|>user" | |
| assistant_token: "<|im_start|>assistant" | |
| end_token: "<|im_end|>" | |
| deployment: | |
| framework: "transformers" | |
| recommended_hardware: | |
| gpu: "A100 80GB (minimum 2x)" | |
| vram: "160GB+" | |
| ram: "256GB+" | |
| storage: "500GB+ NVMe SSD" | |
| serving: | |
| engine: "vllm" | |
| max_concurrent_requests: 128 | |
| max_model_len: 131072 | |
| gpu_memory_utilization: 0.9 | |
| swap_space: 16 | |
| endpoints: | |
| - name: "completions" | |
| path: "/v1/completions" | |
| methods: ["POST"] | |
| - name: "chat_completions" | |
| path: "/v1/chat/completions" | |
| methods: ["POST"] | |
| - name: "embeddings" | |
| path: "/v1/embeddings" | |
| methods: ["POST"] | |
| research: | |
| status: "experimental" | |
| stage: "development" | |
| evaluation_metrics: | |
| perplexity: 2.34 | |
| accuracy_mmlu: 0.847 | |
| accuracy_gsm8k: 0.892 | |
| accuracy_humaneval: 0.756 | |
| accuracy_mbpp: 0.723 | |
| benchmarks: | |
| reasoning: | |
| arc_challenge: 0.834 | |
| hellaswag: 0.889 | |
| winogrande: 0.823 | |
| code: | |
| humaneval: 0.756 | |
| mbpp: 0.723 | |
| ds1000: 0.645 | |
| mathematics: | |
| gsm8k: 0.892 | |
| math: 0.567 | |
| minerva: 0.534 | |
| knowledge: | |
| mmlu: 0.847 | |
| truthfulqa: 0.612 | |
| limitations: | |
| - "Model is in research phase - outputs should be verified" | |
| - "May exhibit biases present in training data" | |
| - "Performance on specialized domains may vary" | |
| - "Long context performance degrades beyond 64K tokens" | |
| license: "Apache-2.0" | |
| citation: | | |
| @misc{helion-2.5-rnd, | |
| title={Helion-2.5-Rnd: Advanced Research Language Model}, | |
| author={DeepXR Team}, | |
| year={2025}, | |
| publisher={DeepXR}, | |
| url={https://huggingface.co/DeepXR/Helion-2.5-Rnd} | |
| } | |
| safety: | |
| content_filtering: true | |
| toxicity_threshold: 0.5 | |
| pii_detection: true | |
| prompt_injection_protection: true | |
| metadata: | |
| created_at: "2025-01-15" | |
| updated_at: "2025-01-30" | |
| status: "research" | |
| visibility: "public" | |
| tags: | |
| - "language-model" | |
| - "research" | |
| - "multimodal" | |
| - "instruction-tuned" | |
| - "long-context" |