Helion-V2.5-Rnd / model_config.yaml
Trouter-Library's picture
Update model_config.yaml (#6)
8878b12 verified
model:
name: "DeepXR/Helion-2.5-Rnd"
version: "2.5.0-research"
type: "transformer"
architecture: "llama"
description: "Helion-2.5 Research & Development - Advanced multimodal language model"
capabilities:
- text_generation
- code_generation
- mathematical_reasoning
- multilingual_understanding
- instruction_following
- context_understanding
- creative_writing
- analytical_reasoning
- scientific_computation
- conversational_ai
model_parameters:
hidden_size: 4096
num_hidden_layers: 32
num_attention_heads: 32
num_key_value_heads: 8
intermediate_size: 14336
vocab_size: 128256
max_position_embeddings: 131072
rope_theta: 500000.0
rope_scaling:
type: "yarn"
factor: 8.0
original_max_position_embeddings: 16384
attention_bias: false
attention_dropout: 0.0
mlp_bias: false
tokenizer:
type: "sentencepiece"
model_max_length: 131072
padding_side: "right"
truncation_side: "right"
chat_template: "{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}{% endfor %}{{ '<|im_start|>assistant\n' }}"
training:
training_steps: 150000
warmup_steps: 2000
learning_rate: 2.0e-5
weight_decay: 0.01
gradient_accumulation_steps: 8
per_device_batch_size: 4
fp16: false
bf16: true
optimization:
optimizer: "adamw_torch_fused"
scheduler: "cosine_with_restarts"
gradient_checkpointing: true
flash_attention: true
tensor_parallel_size: 4
pipeline_parallel_size: 2
quantization:
bits: 16
precision: "float16"
supported_formats:
- "fp16"
note: "Model is provided in full FP16 precision without quantization"
inference:
default_parameters:
temperature: 0.7
top_p: 0.9
top_k: 50
repetition_penalty: 1.1
max_new_tokens: 4096
do_sample: true
num_beams: 1
generation_config:
pad_token_id: 128001
bos_token_id: 128000
eos_token_id: 128009
use_cache: true
output_attentions: false
output_hidden_states: false
return_dict_in_generate: true
performance:
batch_size: 1
max_batch_size: 32
streaming: true
gpu_memory_utilization: 0.95
tensor_parallel: true
special_tokens:
bos_token: "<|begin_of_text|>"
eos_token: "<|end_of_text|>"
pad_token: "<|pad|>"
unk_token: "<|unk|>"
system_token: "<|im_start|>system"
user_token: "<|im_start|>user"
assistant_token: "<|im_start|>assistant"
end_token: "<|im_end|>"
deployment:
framework: "transformers"
recommended_hardware:
gpu: "A100 80GB (minimum 2x)"
vram: "160GB+"
ram: "256GB+"
storage: "500GB+ NVMe SSD"
serving:
engine: "vllm"
max_concurrent_requests: 128
max_model_len: 131072
gpu_memory_utilization: 0.9
swap_space: 16
endpoints:
- name: "completions"
path: "/v1/completions"
methods: ["POST"]
- name: "chat_completions"
path: "/v1/chat/completions"
methods: ["POST"]
- name: "embeddings"
path: "/v1/embeddings"
methods: ["POST"]
research:
status: "experimental"
stage: "development"
evaluation_metrics:
perplexity: 2.34
accuracy_mmlu: 0.847
accuracy_gsm8k: 0.892
accuracy_humaneval: 0.756
accuracy_mbpp: 0.723
benchmarks:
reasoning:
arc_challenge: 0.834
hellaswag: 0.889
winogrande: 0.823
code:
humaneval: 0.756
mbpp: 0.723
ds1000: 0.645
mathematics:
gsm8k: 0.892
math: 0.567
minerva: 0.534
knowledge:
mmlu: 0.847
truthfulqa: 0.612
limitations:
- "Model is in research phase - outputs should be verified"
- "May exhibit biases present in training data"
- "Performance on specialized domains may vary"
- "Long context performance degrades beyond 64K tokens"
license: "Apache-2.0"
citation: |
@misc{helion-2.5-rnd,
title={Helion-2.5-Rnd: Advanced Research Language Model},
author={DeepXR Team},
year={2025},
publisher={DeepXR},
url={https://huggingface.co/DeepXR/Helion-2.5-Rnd}
}
safety:
content_filtering: true
toxicity_threshold: 0.5
pii_detection: true
prompt_injection_protection: true
metadata:
created_at: "2025-01-15"
updated_at: "2025-01-30"
status: "research"
visibility: "public"
tags:
- "language-model"
- "research"
- "multimodal"
- "instruction-tuned"
- "long-context"