DeepXR
/

Helion-V2.5-Rnd

+model:
+  name: "DeepXR/Helion-2.5-Rnd"
+  version: "2.5.0-research"
+  type: "transformer"
+  architecture: "llama"
+  description: "Helion-2.5 Research & Development - Advanced multimodal language model"
+  capabilities:
+    - text_generation
+    - code_generation
+    - mathematical_reasoning
+    - multilingual_understanding
+    - instruction_following
+    - context_understanding
+    - creative_writing
+    - analytical_reasoning
+    - scientific_computation
+    - conversational_ai
+  model_parameters:
+    hidden_size: 4096
+    num_hidden_layers: 32
+    num_attention_heads: 32
+    num_key_value_heads: 8
+    intermediate_size: 14336
+    vocab_size: 128256
+    max_position_embeddings: 131072
+    rope_theta: 500000.0
+    rope_scaling:
+      type: "yarn"
+      factor: 8.0
+      original_max_position_embeddings: 16384
+    attention_bias: false
+    attention_dropout: 0.0
+    mlp_bias: false
+  tokenizer:
+    type: "sentencepiece"
+    model_max_length: 131072
+    padding_side: "right"
+    truncation_side: "right"
+    chat_template: "{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}{% endfor %}{{ '<|im_start|>assistant\n' }}"
+  training:
+    base_model: "meta-llama/Meta-Llama-3.1-70B"
+    training_data:
+      - "scientific_papers"
+      - "code_repositories"
+      - "mathematical_proofs"
+      - "conversational_data"
+      - "multilingual_corpus"
+      - "technical_documentation"
+    total_tokens: "2.5T"
+    training_steps: 150000
+    warmup_steps: 2000
+    learning_rate: 2.0e-5
+    weight_decay: 0.01
+    gradient_accumulation_steps: 8
+    per_device_batch_size: 4
+    fp16: false
+    bf16: true
+  optimization:
+    optimizer: "adamw_torch_fused"
+    scheduler: "cosine_with_restarts"
+    gradient_checkpointing: true
+    flash_attention: true
+    tensor_parallel_size: 4
+    pipeline_parallel_size: 2
+  quantization:
+    bits: 16
+    supported_formats:
+      - "fp16"
+      - "bf16"
+      - "int8"
+      - "int4"
+      - "awq"
+      - "gptq"
+      - "gguf"
+inference:
+  default_parameters:
+    temperature: 0.7
+    top_p: 0.9
+    top_k: 50
+    repetition_penalty: 1.1
+    max_new_tokens: 4096
+    do_sample: true
+    num_beams: 1
+  generation_config:
+    pad_token_id: 128001
+    bos_token_id: 128000
+    eos_token_id: 128009
+    use_cache: true
+    output_attentions: false
+    output_hidden_states: false
+    return_dict_in_generate: true
+  performance:
+    batch_size: 1
+    max_batch_size: 32
+    streaming: true
+    gpu_memory_utilization: 0.95
+    tensor_parallel: true
+  special_tokens:
+    bos_token: "<|begin_of_text|>"
+    eos_token: "<|end_of_text|>"
+    pad_token: "<|pad|>"
+    unk_token: "<|unk|>"
+    system_token: "<|im_start|>system"
+    user_token: "<|im_start|>user"
+    assistant_token: "<|im_start|>assistant"
+    end_token: "<|im_end|>"
+deployment:
+  framework: "transformers"
+  recommended_hardware:
+    gpu: "A100 80GB (minimum 2x)"
+    vram: "160GB+"
+    ram: "256GB+"
+    storage: "500GB+ NVMe SSD"
+  serving:
+    engine: "vllm"
+    max_concurrent_requests: 128
+    max_model_len: 131072
+    gpu_memory_utilization: 0.9
+    swap_space: 16
+  endpoints:
+    - name: "completions"
+      path: "/v1/completions"
+      methods: ["POST"]
+    - name: "chat_completions"
+      path: "/v1/chat/completions"
+      methods: ["POST"]
+    - name: "embeddings"
+      path: "/v1/embeddings"
+      methods: ["POST"]
+research:
+  status: "experimental"
+  stage: "development"
+  evaluation_metrics:
+    perplexity: 2.34
+    accuracy_mmlu: 0.847
+    accuracy_gsm8k: 0.892
+    accuracy_humaneval: 0.756
+    accuracy_mbpp: 0.723
+  benchmarks:
+    reasoning:
+      arc_challenge: 0.834
+      hellaswag: 0.889
+      winogrande: 0.823
+    code:
+      humaneval: 0.756
+      mbpp: 0.723
+      ds1000: 0.645
+    mathematics:
+      gsm8k: 0.892
+      math: 0.567
+      minerva: 0.534
+    knowledge:
+      mmlu: 0.847
+      truthfulqa: 0.612
+  limitations:
+    - "Model is in research phase - outputs should be verified"
+    - "May exhibit biases present in training data"
+    - "Performance on specialized domains may vary"
+    - "Long context performance degrades beyond 64K tokens"
+  license: "Apache-2.0"
+  citation: |
+    @misc{helion-2.5-rnd,
+      title={Helion-2.5-Rnd: Advanced Research Language Model},
+      author={DeepXR Team},
+      year={2025},
+      publisher={DeepXR},
+      url={https://huggingface.co/DeepXR/Helion-2.5-Rnd}
+    }
+safety:
+  content_filtering: true
+  toxicity_threshold: 0.5
+  pii_detection: true
+  prompt_injection_protection: true
+metadata:
+  created_at: "2025-01-15"
+  updated_at: "2025-01-30"
+  status: "research"
+  visibility: "public"
+  tags:
+    - "language-model"
+    - "research"
+    - "multimodal"
+    - "instruction-tuned"
+    - "long-context"