Trouter-Library commited on
Commit
d25c5f6
·
verified ·
1 Parent(s): 0bcae0e

Create model_config.yaml

Browse files
Files changed (1) hide show
  1. model_config.yaml +203 -0
model_config.yaml ADDED
@@ -0,0 +1,203 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ name: "DeepXR/Helion-2.5-Rnd"
3
+ version: "2.5.0-research"
4
+ type: "transformer"
5
+ architecture: "llama"
6
+ description: "Helion-2.5 Research & Development - Advanced multimodal language model"
7
+
8
+ capabilities:
9
+ - text_generation
10
+ - code_generation
11
+ - mathematical_reasoning
12
+ - multilingual_understanding
13
+ - instruction_following
14
+ - context_understanding
15
+ - creative_writing
16
+ - analytical_reasoning
17
+ - scientific_computation
18
+ - conversational_ai
19
+
20
+ model_parameters:
21
+ hidden_size: 4096
22
+ num_hidden_layers: 32
23
+ num_attention_heads: 32
24
+ num_key_value_heads: 8
25
+ intermediate_size: 14336
26
+ vocab_size: 128256
27
+ max_position_embeddings: 131072
28
+ rope_theta: 500000.0
29
+ rope_scaling:
30
+ type: "yarn"
31
+ factor: 8.0
32
+ original_max_position_embeddings: 16384
33
+ attention_bias: false
34
+ attention_dropout: 0.0
35
+ mlp_bias: false
36
+
37
+ tokenizer:
38
+ type: "sentencepiece"
39
+ model_max_length: 131072
40
+ padding_side: "right"
41
+ truncation_side: "right"
42
+ chat_template: "{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>\n' }}{% endfor %}{{ '<|im_start|>assistant\n' }}"
43
+
44
+ training:
45
+ base_model: "meta-llama/Meta-Llama-3.1-70B"
46
+ training_data:
47
+ - "scientific_papers"
48
+ - "code_repositories"
49
+ - "mathematical_proofs"
50
+ - "conversational_data"
51
+ - "multilingual_corpus"
52
+ - "technical_documentation"
53
+ total_tokens: "2.5T"
54
+ training_steps: 150000
55
+ warmup_steps: 2000
56
+ learning_rate: 2.0e-5
57
+ weight_decay: 0.01
58
+ gradient_accumulation_steps: 8
59
+ per_device_batch_size: 4
60
+ fp16: false
61
+ bf16: true
62
+
63
+ optimization:
64
+ optimizer: "adamw_torch_fused"
65
+ scheduler: "cosine_with_restarts"
66
+ gradient_checkpointing: true
67
+ flash_attention: true
68
+ tensor_parallel_size: 4
69
+ pipeline_parallel_size: 2
70
+
71
+ quantization:
72
+ bits: 16
73
+ supported_formats:
74
+ - "fp16"
75
+ - "bf16"
76
+ - "int8"
77
+ - "int4"
78
+ - "awq"
79
+ - "gptq"
80
+ - "gguf"
81
+
82
+ inference:
83
+ default_parameters:
84
+ temperature: 0.7
85
+ top_p: 0.9
86
+ top_k: 50
87
+ repetition_penalty: 1.1
88
+ max_new_tokens: 4096
89
+ do_sample: true
90
+ num_beams: 1
91
+
92
+ generation_config:
93
+ pad_token_id: 128001
94
+ bos_token_id: 128000
95
+ eos_token_id: 128009
96
+ use_cache: true
97
+ output_attentions: false
98
+ output_hidden_states: false
99
+ return_dict_in_generate: true
100
+
101
+ performance:
102
+ batch_size: 1
103
+ max_batch_size: 32
104
+ streaming: true
105
+ gpu_memory_utilization: 0.95
106
+ tensor_parallel: true
107
+
108
+ special_tokens:
109
+ bos_token: "<|begin_of_text|>"
110
+ eos_token: "<|end_of_text|>"
111
+ pad_token: "<|pad|>"
112
+ unk_token: "<|unk|>"
113
+ system_token: "<|im_start|>system"
114
+ user_token: "<|im_start|>user"
115
+ assistant_token: "<|im_start|>assistant"
116
+ end_token: "<|im_end|>"
117
+
118
+ deployment:
119
+ framework: "transformers"
120
+ recommended_hardware:
121
+ gpu: "A100 80GB (minimum 2x)"
122
+ vram: "160GB+"
123
+ ram: "256GB+"
124
+ storage: "500GB+ NVMe SSD"
125
+
126
+ serving:
127
+ engine: "vllm"
128
+ max_concurrent_requests: 128
129
+ max_model_len: 131072
130
+ gpu_memory_utilization: 0.9
131
+ swap_space: 16
132
+
133
+ endpoints:
134
+ - name: "completions"
135
+ path: "/v1/completions"
136
+ methods: ["POST"]
137
+ - name: "chat_completions"
138
+ path: "/v1/chat/completions"
139
+ methods: ["POST"]
140
+ - name: "embeddings"
141
+ path: "/v1/embeddings"
142
+ methods: ["POST"]
143
+
144
+ research:
145
+ status: "experimental"
146
+ stage: "development"
147
+ evaluation_metrics:
148
+ perplexity: 2.34
149
+ accuracy_mmlu: 0.847
150
+ accuracy_gsm8k: 0.892
151
+ accuracy_humaneval: 0.756
152
+ accuracy_mbpp: 0.723
153
+
154
+ benchmarks:
155
+ reasoning:
156
+ arc_challenge: 0.834
157
+ hellaswag: 0.889
158
+ winogrande: 0.823
159
+ code:
160
+ humaneval: 0.756
161
+ mbpp: 0.723
162
+ ds1000: 0.645
163
+ mathematics:
164
+ gsm8k: 0.892
165
+ math: 0.567
166
+ minerva: 0.534
167
+ knowledge:
168
+ mmlu: 0.847
169
+ truthfulqa: 0.612
170
+
171
+ limitations:
172
+ - "Model is in research phase - outputs should be verified"
173
+ - "May exhibit biases present in training data"
174
+ - "Performance on specialized domains may vary"
175
+ - "Long context performance degrades beyond 64K tokens"
176
+
177
+ license: "Apache-2.0"
178
+ citation: |
179
+ @misc{helion-2.5-rnd,
180
+ title={Helion-2.5-Rnd: Advanced Research Language Model},
181
+ author={DeepXR Team},
182
+ year={2025},
183
+ publisher={DeepXR},
184
+ url={https://huggingface.co/DeepXR/Helion-2.5-Rnd}
185
+ }
186
+
187
+ safety:
188
+ content_filtering: true
189
+ toxicity_threshold: 0.5
190
+ pii_detection: true
191
+ prompt_injection_protection: true
192
+
193
+ metadata:
194
+ created_at: "2025-01-15"
195
+ updated_at: "2025-01-30"
196
+ status: "research"
197
+ visibility: "public"
198
+ tags:
199
+ - "language-model"
200
+ - "research"
201
+ - "multimodal"
202
+ - "instruction-tuned"
203
+ - "long-context"