|
|
--- |
|
|
library_name: transformers |
|
|
license: apache-2.0 |
|
|
datasets: |
|
|
- meta-math/MetaMathQA |
|
|
language: |
|
|
- en |
|
|
base_model: |
|
|
- Qwen/Qwen3-4B |
|
|
pipeline_tag: text-generation |
|
|
--- |
|
|
|
|
|
# Model Card for Model ID |
|
|
|
|
|
|
|
|
### Training Data |
|
|
|
|
|
https://huggingface.co/datasets/meta-math/MetaMathQA |
|
|
|
|
|
#### Training Hyperparameters |
|
|
|
|
|
batch_size = 8, |
|
|
epoch = 1, |
|
|
learning_rate = 1e-4 |
|
|
|
|
|
Lora: |
|
|
r=16, |
|
|
lora_alpha=32, |
|
|
lora_dropout=0.05 |
|
|
|
|
|
#### Metrics |
|
|
|
|
|
metrics={'train_runtime': 729.5559, 'train_samples_per_second': 9.746, 'train_steps_per_second': 0.306, 'total_flos': 7.949170591137792e+16, 'train_loss': 2.817356810976037, 'epoch': 1.0} |
|
|
|
|
|
|