ImNotTam commited on
Commit
cbd18bc
·
verified ·
1 Parent(s): 07cced4

Upload full training folder with all checkpoints

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. README.md +179 -0
  3. checkpoint-150/README.md +210 -0
  4. checkpoint-150/adapter_config.json +38 -0
  5. checkpoint-150/adapter_model.safetensors +3 -0
  6. checkpoint-150/added_tokens.json +3 -0
  7. checkpoint-150/chat_template.jinja +47 -0
  8. checkpoint-150/optimizer.pt +3 -0
  9. checkpoint-150/preprocessor_config.json +29 -0
  10. checkpoint-150/processor_config.json +4 -0
  11. checkpoint-150/rng_state.pth +3 -0
  12. checkpoint-150/scheduler.pt +3 -0
  13. checkpoint-150/special_tokens_map.json +33 -0
  14. checkpoint-150/tokenizer.json +3 -0
  15. checkpoint-150/tokenizer.model +3 -0
  16. checkpoint-150/tokenizer_config.json +0 -0
  17. checkpoint-150/trainer_state.json +172 -0
  18. checkpoint-150/training_args.bin +3 -0
  19. checkpoint-200/README.md +210 -0
  20. checkpoint-200/adapter_config.json +38 -0
  21. checkpoint-200/adapter_model.safetensors +3 -0
  22. checkpoint-200/added_tokens.json +3 -0
  23. checkpoint-200/chat_template.jinja +47 -0
  24. checkpoint-200/optimizer.pt +3 -0
  25. checkpoint-200/preprocessor_config.json +29 -0
  26. checkpoint-200/processor_config.json +4 -0
  27. checkpoint-200/rng_state.pth +3 -0
  28. checkpoint-200/scheduler.pt +3 -0
  29. checkpoint-200/special_tokens_map.json +33 -0
  30. checkpoint-200/tokenizer.json +3 -0
  31. checkpoint-200/tokenizer.model +3 -0
  32. checkpoint-200/tokenizer_config.json +0 -0
  33. checkpoint-200/trainer_state.json +215 -0
  34. checkpoint-200/training_args.bin +3 -0
  35. checkpoint-210/README.md +210 -0
  36. checkpoint-210/adapter_config.json +38 -0
  37. checkpoint-210/adapter_model.safetensors +3 -0
  38. checkpoint-210/added_tokens.json +3 -0
  39. checkpoint-210/chat_template.jinja +47 -0
  40. checkpoint-210/optimizer.pt +3 -0
  41. checkpoint-210/preprocessor_config.json +29 -0
  42. checkpoint-210/processor_config.json +4 -0
  43. checkpoint-210/rng_state.pth +3 -0
  44. checkpoint-210/scheduler.pt +3 -0
  45. checkpoint-210/special_tokens_map.json +33 -0
  46. checkpoint-210/tokenizer.json +3 -0
  47. checkpoint-210/tokenizer.model +3 -0
  48. checkpoint-210/tokenizer_config.json +0 -0
  49. checkpoint-210/trainer_state.json +222 -0
  50. checkpoint-210/training_args.bin +3 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ checkpoint-210/tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ final_model/tokenizer.json filter=lfs diff=lfs merge=lfs -text
40
+ lora_adapters/tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - vi
4
+ - en
5
+ license: apache-2.0
6
+ tags:
7
+ - llm-judge
8
+ - training-checkpoint
9
+ - lora
10
+ - unsloth
11
+ ---
12
+
13
+ # llm-judge-full-backup
14
+
15
+ Full training folder backup - Toàn bộ checkpoints và models.
16
+
17
+ ## 📂 Cấu trúc Folder
18
+ ```
19
+ train_llm_judge_v2/
20
+ ├── checkpoint-150/ # Checkpoint tại step 150
21
+ ├── checkpoint-200/ # Checkpoint tại step 200
22
+ ├── checkpoint-210/ # Checkpoint tại step 210
23
+ ├── final_model/ # Model cuối cùng (merged)
24
+ ├── lora_adapters/ # LoRA adapters
25
+ ├── README.md
26
+ ├── zero_shot_metrics.json
27
+ └── zero_shot_results.csv
28
+ ```
29
+
30
+ ## 🚀 Sử Dụng
31
+
32
+ ### 1️⃣ Clone Repo
33
+ ```bash
34
+ git lfs install
35
+ git clone https://huggingface.co/ImNotTam/llm-judge-full-backup
36
+ cd llm-judge-full-backup
37
+ ```
38
+
39
+ ### 2️⃣ Load LoRA Adapters (Nhẹ nhất - khuyến nghị)
40
+ ```python
41
+ from unsloth import FastLanguageModel
42
+
43
+ model, tokenizer = FastLanguageModel.from_pretrained(
44
+ model_name="ImNotTam/llm-judge-full-backup",
45
+ subfolder="lora_adapters",
46
+ max_seq_length=2048,
47
+ dtype=None,
48
+ load_in_4bit=True,
49
+ )
50
+
51
+ # Enable inference mode
52
+ FastLanguageModel.for_inference(model)
53
+
54
+ # Test
55
+ prompt = "Đánh giá response này..."
56
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
57
+ outputs = model.generate(**inputs, max_new_tokens=256)
58
+ print(tokenizer.decode(outputs[0], skip_special_tokens=True))
59
+ ```
60
+
61
+ ### 3️⃣ Load Final Model
62
+ ```python
63
+ from transformers import AutoModelForCausalLM, AutoTokenizer
64
+
65
+ model = AutoModelForCausalLM.from_pretrained(
66
+ "ImNotTam/llm-judge-full-backup",
67
+ subfolder="final_model",
68
+ device_map="auto",
69
+ torch_dtype="auto"
70
+ )
71
+ tokenizer = AutoTokenizer.from_pretrained("ImNotTam/llm-judge-full-backup", subfolder="final_model")
72
+
73
+ # Inference
74
+ inputs = tokenizer("Your prompt", return_tensors="pt").to("cuda")
75
+ outputs = model.generate(**inputs)
76
+ print(tokenizer.decode(outputs[0]))
77
+ ```
78
+
79
+ ### 4️⃣ Resume Training từ Checkpoint
80
+ ```python
81
+ from transformers import Trainer, TrainingArguments
82
+
83
+ # Load checkpoint muốn resume
84
+ model = AutoModelForCausalLM.from_pretrained(
85
+ "ImNotTam/llm-judge-full-backup",
86
+ subfolder="checkpoint-210", # Chọn checkpoint
87
+ device_map="auto"
88
+ )
89
+
90
+ # Continue training
91
+ trainer = Trainer(
92
+ model=model,
93
+ args=TrainingArguments(
94
+ output_dir="./continue_training",
95
+ # ... your training args
96
+ ),
97
+ )
98
+ trainer.train(resume_from_checkpoint=True)
99
+ ```
100
+
101
+ ### 5️⃣ Fine-tune Tiếp từ LoRA Adapter
102
+ ```python
103
+ from unsloth import FastLanguageModel
104
+ from trl import SFTTrainer
105
+
106
+ # Load LoRA adapter
107
+ model, tokenizer = FastLanguageModel.from_pretrained(
108
+ model_name="ImNotTam/llm-judge-full-backup",
109
+ subfolder="lora_adapters",
110
+ max_seq_length=2048,
111
+ dtype=None,
112
+ load_in_4bit=True,
113
+ )
114
+
115
+ # Add LoRA config để train tiếp
116
+ model = FastLanguageModel.get_peft_model(
117
+ model,
118
+ r=16,
119
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
120
+ "gate_proj", "up_proj", "down_proj"],
121
+ lora_alpha=16,
122
+ lora_dropout=0,
123
+ bias="none",
124
+ use_gradient_checkpointing="unsloth",
125
+ )
126
+
127
+ # Train với data mới
128
+ trainer = SFTTrainer(
129
+ model=model,
130
+ tokenizer=tokenizer,
131
+ train_dataset=your_new_dataset,
132
+ # ... training args
133
+ )
134
+ trainer.train()
135
+ ```
136
+
137
+ ### 6️⃣ Xem Metrics và Results
138
+ ```python
139
+ import json
140
+ import pandas as pd
141
+
142
+ # Load metrics
143
+ with open("zero_shot_metrics.json", "r") as f:
144
+ metrics = json.load(f)
145
+ print("📊 Metrics:", metrics)
146
+
147
+ # Load results
148
+ results = pd.read_csv("zero_shot_results.csv")
149
+ print("\n📈 Results:")
150
+ print(results.head())
151
+ ```
152
+
153
+ ## 📋 Nội Dung Repo
154
+
155
+ | Folder/File | Mô tả | Kích thước |
156
+ |-------------|-------|------------|
157
+ | `lora_adapters/` | LoRA adapters (nhẹ) | ~50-100 MB |
158
+ | `final_model/` | Model merged đầy đủ | ~4-8 GB |
159
+ | `checkpoint-150/` | Training checkpoint | ~4-8 GB |
160
+ | `checkpoint-200/` | Training checkpoint | ~4-8 GB |
161
+ | `checkpoint-210/` | Training checkpoint | ~4-8 GB |
162
+ | `zero_shot_metrics.json` | Evaluation metrics | <1 MB |
163
+ | `zero_shot_results.csv` | Detailed results | <1 MB |
164
+
165
+ ## 💡 Khuyến Nghị
166
+
167
+ - **Inference nhanh:** Dùng `lora_adapters/`
168
+ - **Production:** Dùng `final_model/`
169
+ - **Train tiếp:** Load `lora_adapters/` + add LoRA config
170
+ - **Resume training:** Load checkpoint cụ thể
171
+
172
+ ## 📦 Requirements
173
+ ```bash
174
+ pip install unsloth transformers torch trl
175
+ ```
176
+
177
+ ## 📄 License
178
+
179
+ Apache 2.0
checkpoint-150/README.md ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/medgemma-4b-it-unsloth-bnb-4bit
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:unsloth/medgemma-4b-it-unsloth-bnb-4bit
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ - unsloth
12
+ ---
13
+
14
+ # Model Card for Model ID
15
+
16
+ <!-- Provide a quick summary of what the model is/does. -->
17
+
18
+
19
+
20
+ ## Model Details
21
+
22
+ ### Model Description
23
+
24
+ <!-- Provide a longer summary of what this model is. -->
25
+
26
+
27
+
28
+ - **Developed by:** [More Information Needed]
29
+ - **Funded by [optional]:** [More Information Needed]
30
+ - **Shared by [optional]:** [More Information Needed]
31
+ - **Model type:** [More Information Needed]
32
+ - **Language(s) (NLP):** [More Information Needed]
33
+ - **License:** [More Information Needed]
34
+ - **Finetuned from model [optional]:** [More Information Needed]
35
+
36
+ ### Model Sources [optional]
37
+
38
+ <!-- Provide the basic links for the model. -->
39
+
40
+ - **Repository:** [More Information Needed]
41
+ - **Paper [optional]:** [More Information Needed]
42
+ - **Demo [optional]:** [More Information Needed]
43
+
44
+ ## Uses
45
+
46
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
47
+
48
+ ### Direct Use
49
+
50
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ### Downstream Use [optional]
55
+
56
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Out-of-Scope Use
61
+
62
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
63
+
64
+ [More Information Needed]
65
+
66
+ ## Bias, Risks, and Limitations
67
+
68
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
69
+
70
+ [More Information Needed]
71
+
72
+ ### Recommendations
73
+
74
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
75
+
76
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
77
+
78
+ ## How to Get Started with the Model
79
+
80
+ Use the code below to get started with the model.
81
+
82
+ [More Information Needed]
83
+
84
+ ## Training Details
85
+
86
+ ### Training Data
87
+
88
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
89
+
90
+ [More Information Needed]
91
+
92
+ ### Training Procedure
93
+
94
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
95
+
96
+ #### Preprocessing [optional]
97
+
98
+ [More Information Needed]
99
+
100
+
101
+ #### Training Hyperparameters
102
+
103
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
104
+
105
+ #### Speeds, Sizes, Times [optional]
106
+
107
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
108
+
109
+ [More Information Needed]
110
+
111
+ ## Evaluation
112
+
113
+ <!-- This section describes the evaluation protocols and provides the results. -->
114
+
115
+ ### Testing Data, Factors & Metrics
116
+
117
+ #### Testing Data
118
+
119
+ <!-- This should link to a Dataset Card if possible. -->
120
+
121
+ [More Information Needed]
122
+
123
+ #### Factors
124
+
125
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
126
+
127
+ [More Information Needed]
128
+
129
+ #### Metrics
130
+
131
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
132
+
133
+ [More Information Needed]
134
+
135
+ ### Results
136
+
137
+ [More Information Needed]
138
+
139
+ #### Summary
140
+
141
+
142
+
143
+ ## Model Examination [optional]
144
+
145
+ <!-- Relevant interpretability work for the model goes here -->
146
+
147
+ [More Information Needed]
148
+
149
+ ## Environmental Impact
150
+
151
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
152
+
153
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
154
+
155
+ - **Hardware Type:** [More Information Needed]
156
+ - **Hours used:** [More Information Needed]
157
+ - **Cloud Provider:** [More Information Needed]
158
+ - **Compute Region:** [More Information Needed]
159
+ - **Carbon Emitted:** [More Information Needed]
160
+
161
+ ## Technical Specifications [optional]
162
+
163
+ ### Model Architecture and Objective
164
+
165
+ [More Information Needed]
166
+
167
+ ### Compute Infrastructure
168
+
169
+ [More Information Needed]
170
+
171
+ #### Hardware
172
+
173
+ [More Information Needed]
174
+
175
+ #### Software
176
+
177
+ [More Information Needed]
178
+
179
+ ## Citation [optional]
180
+
181
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
182
+
183
+ **BibTeX:**
184
+
185
+ [More Information Needed]
186
+
187
+ **APA:**
188
+
189
+ [More Information Needed]
190
+
191
+ ## Glossary [optional]
192
+
193
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
194
+
195
+ [More Information Needed]
196
+
197
+ ## More Information [optional]
198
+
199
+ [More Information Needed]
200
+
201
+ ## Model Card Authors [optional]
202
+
203
+ [More Information Needed]
204
+
205
+ ## Model Card Contact
206
+
207
+ [More Information Needed]
208
+ ### Framework versions
209
+
210
+ - PEFT 0.17.1
checkpoint-150/adapter_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "Gemma3ForConditionalGeneration",
5
+ "parent_library": "transformers.models.gemma3.modeling_gemma3",
6
+ "unsloth_fixed": true
7
+ },
8
+ "base_model_name_or_path": "unsloth/medgemma-4b-it-unsloth-bnb-4bit",
9
+ "bias": "none",
10
+ "corda_config": null,
11
+ "eva_config": null,
12
+ "exclude_modules": null,
13
+ "fan_in_fan_out": false,
14
+ "inference_mode": true,
15
+ "init_lora_weights": true,
16
+ "layer_replication": null,
17
+ "layers_pattern": null,
18
+ "layers_to_transform": null,
19
+ "loftq_config": {},
20
+ "lora_alpha": 64,
21
+ "lora_bias": false,
22
+ "lora_dropout": 0.05,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": "(?:.*?(?:vision|image|visual|patch|language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj)))",
32
+ "target_parameters": null,
33
+ "task_type": "CAUSAL_LM",
34
+ "trainable_token_indices": null,
35
+ "use_dora": false,
36
+ "use_qalora": false,
37
+ "use_rslora": true
38
+ }
checkpoint-150/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4bb0508789a06cf240bca6ab8f61b455bcf61321e2c28d3d364fd1cabe8a9c
3
+ size 616090488
checkpoint-150/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
checkpoint-150/chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
checkpoint-150/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1369d7f5c9ec4d0befb1fcf94e468e6cc3ef5a776c8cf4dc5aa5468709f4401
3
+ size 313733861
checkpoint-150/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }
checkpoint-150/processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 256,
3
+ "processor_class": "Gemma3Processor"
4
+ }
checkpoint-150/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:288897675cdba8d6a7b9d99e80c351a4f0b61b0872bed353f0af03b57d468e05
3
+ size 14645
checkpoint-150/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea170cafb2d13b7eb55e5efbd68b89bd8609391610031fd2f3d1014bef160c4f
3
+ size 1465
checkpoint-150/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<end_of_turn>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
checkpoint-150/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7666402c0617d170e6b0a985b3130c3fb0795393aa0970600994a5d9aae12351
3
+ size 33384822
checkpoint-150/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
checkpoint-150/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-150/trainer_state.json ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 100,
3
+ "best_metric": 0.19296656548976898,
4
+ "best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/train_llm_judge_v2/checkpoint-100",
5
+ "epoch": 3.581818181818182,
6
+ "eval_steps": 50,
7
+ "global_step": 150,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.24242424242424243,
14
+ "grad_norm": 5.080691337585449,
15
+ "learning_rate": 8.571428571428571e-05,
16
+ "loss": 0.3263,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.48484848484848486,
21
+ "grad_norm": 1.2092622518539429,
22
+ "learning_rate": 0.00018095238095238095,
23
+ "loss": 0.0425,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7272727272727273,
28
+ "grad_norm": 0.8844823837280273,
29
+ "learning_rate": 0.0001991171517679013,
30
+ "loss": 0.0253,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.9696969696969697,
35
+ "grad_norm": 0.8625118732452393,
36
+ "learning_rate": 0.0001955572805786141,
37
+ "loss": 0.0199,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 1.1939393939393939,
42
+ "grad_norm": 0.5813594460487366,
43
+ "learning_rate": 0.00018936326403234125,
44
+ "loss": 0.0265,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 1.1939393939393939,
49
+ "eval_loss": 0.22386720776557922,
50
+ "eval_runtime": 55.0529,
51
+ "eval_samples_per_second": 5.958,
52
+ "eval_steps_per_second": 0.381,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 1.4363636363636363,
57
+ "grad_norm": 0.4559573531150818,
58
+ "learning_rate": 0.00018070584725522762,
59
+ "loss": 0.0195,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 1.6787878787878787,
64
+ "grad_norm": 0.4941273033618927,
65
+ "learning_rate": 0.00016982368180860728,
66
+ "loss": 0.0189,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.9212121212121214,
71
+ "grad_norm": 4.042570114135742,
72
+ "learning_rate": 0.0001570167469866962,
73
+ "loss": 0.0187,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.1454545454545455,
78
+ "grad_norm": 0.38869237899780273,
79
+ "learning_rate": 0.0001426380805454254,
80
+ "loss": 0.0227,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 2.3878787878787877,
85
+ "grad_norm": 0.2846849858760834,
86
+ "learning_rate": 0.00012708404681430053,
87
+ "loss": 0.0172,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 2.3878787878787877,
92
+ "eval_loss": 0.19296656548976898,
93
+ "eval_runtime": 35.4935,
94
+ "eval_samples_per_second": 9.241,
95
+ "eval_steps_per_second": 0.592,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 2.6303030303030304,
100
+ "grad_norm": 0.4179554581642151,
101
+ "learning_rate": 0.00011078341046187589,
102
+ "loss": 0.0164,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 2.8727272727272726,
107
+ "grad_norm": 0.2789813280105591,
108
+ "learning_rate": 9.418551710895243e-05,
109
+ "loss": 0.0165,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 3.096969696969697,
114
+ "grad_norm": 0.21186885237693787,
115
+ "learning_rate": 7.774790660436858e-05,
116
+ "loss": 0.021,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 3.3393939393939394,
121
+ "grad_norm": 0.27075108885765076,
122
+ "learning_rate": 6.192370041755505e-05,
123
+ "loss": 0.0154,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 3.581818181818182,
128
+ "grad_norm": 0.2492239624261856,
129
+ "learning_rate": 4.7149110828754464e-05,
130
+ "loss": 0.0153,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 3.581818181818182,
135
+ "eval_loss": 0.19950562715530396,
136
+ "eval_runtime": 35.5167,
137
+ "eval_samples_per_second": 9.235,
138
+ "eval_steps_per_second": 0.591,
139
+ "step": 150
140
+ }
141
+ ],
142
+ "logging_steps": 10,
143
+ "max_steps": 210,
144
+ "num_input_tokens_seen": 0,
145
+ "num_train_epochs": 5,
146
+ "save_steps": 50,
147
+ "stateful_callbacks": {
148
+ "EarlyStoppingCallback": {
149
+ "args": {
150
+ "early_stopping_patience": 5,
151
+ "early_stopping_threshold": 0.001
152
+ },
153
+ "attributes": {
154
+ "early_stopping_patience_counter": 1
155
+ }
156
+ },
157
+ "TrainerControl": {
158
+ "args": {
159
+ "should_epoch_stop": false,
160
+ "should_evaluate": false,
161
+ "should_log": false,
162
+ "should_save": true,
163
+ "should_training_stop": false
164
+ },
165
+ "attributes": {}
166
+ }
167
+ },
168
+ "total_flos": 6.786801248895418e+16,
169
+ "train_batch_size": 8,
170
+ "trial_name": null,
171
+ "trial_params": null
172
+ }
checkpoint-150/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448416956fd033ecbacc0301c2347c6e7da44e6d59e19be42ee84d5a73ac1775
3
+ size 6417
checkpoint-200/README.md ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/medgemma-4b-it-unsloth-bnb-4bit
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:unsloth/medgemma-4b-it-unsloth-bnb-4bit
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ - unsloth
12
+ ---
13
+
14
+ # Model Card for Model ID
15
+
16
+ <!-- Provide a quick summary of what the model is/does. -->
17
+
18
+
19
+
20
+ ## Model Details
21
+
22
+ ### Model Description
23
+
24
+ <!-- Provide a longer summary of what this model is. -->
25
+
26
+
27
+
28
+ - **Developed by:** [More Information Needed]
29
+ - **Funded by [optional]:** [More Information Needed]
30
+ - **Shared by [optional]:** [More Information Needed]
31
+ - **Model type:** [More Information Needed]
32
+ - **Language(s) (NLP):** [More Information Needed]
33
+ - **License:** [More Information Needed]
34
+ - **Finetuned from model [optional]:** [More Information Needed]
35
+
36
+ ### Model Sources [optional]
37
+
38
+ <!-- Provide the basic links for the model. -->
39
+
40
+ - **Repository:** [More Information Needed]
41
+ - **Paper [optional]:** [More Information Needed]
42
+ - **Demo [optional]:** [More Information Needed]
43
+
44
+ ## Uses
45
+
46
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
47
+
48
+ ### Direct Use
49
+
50
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ### Downstream Use [optional]
55
+
56
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Out-of-Scope Use
61
+
62
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
63
+
64
+ [More Information Needed]
65
+
66
+ ## Bias, Risks, and Limitations
67
+
68
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
69
+
70
+ [More Information Needed]
71
+
72
+ ### Recommendations
73
+
74
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
75
+
76
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
77
+
78
+ ## How to Get Started with the Model
79
+
80
+ Use the code below to get started with the model.
81
+
82
+ [More Information Needed]
83
+
84
+ ## Training Details
85
+
86
+ ### Training Data
87
+
88
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
89
+
90
+ [More Information Needed]
91
+
92
+ ### Training Procedure
93
+
94
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
95
+
96
+ #### Preprocessing [optional]
97
+
98
+ [More Information Needed]
99
+
100
+
101
+ #### Training Hyperparameters
102
+
103
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
104
+
105
+ #### Speeds, Sizes, Times [optional]
106
+
107
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
108
+
109
+ [More Information Needed]
110
+
111
+ ## Evaluation
112
+
113
+ <!-- This section describes the evaluation protocols and provides the results. -->
114
+
115
+ ### Testing Data, Factors & Metrics
116
+
117
+ #### Testing Data
118
+
119
+ <!-- This should link to a Dataset Card if possible. -->
120
+
121
+ [More Information Needed]
122
+
123
+ #### Factors
124
+
125
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
126
+
127
+ [More Information Needed]
128
+
129
+ #### Metrics
130
+
131
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
132
+
133
+ [More Information Needed]
134
+
135
+ ### Results
136
+
137
+ [More Information Needed]
138
+
139
+ #### Summary
140
+
141
+
142
+
143
+ ## Model Examination [optional]
144
+
145
+ <!-- Relevant interpretability work for the model goes here -->
146
+
147
+ [More Information Needed]
148
+
149
+ ## Environmental Impact
150
+
151
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
152
+
153
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
154
+
155
+ - **Hardware Type:** [More Information Needed]
156
+ - **Hours used:** [More Information Needed]
157
+ - **Cloud Provider:** [More Information Needed]
158
+ - **Compute Region:** [More Information Needed]
159
+ - **Carbon Emitted:** [More Information Needed]
160
+
161
+ ## Technical Specifications [optional]
162
+
163
+ ### Model Architecture and Objective
164
+
165
+ [More Information Needed]
166
+
167
+ ### Compute Infrastructure
168
+
169
+ [More Information Needed]
170
+
171
+ #### Hardware
172
+
173
+ [More Information Needed]
174
+
175
+ #### Software
176
+
177
+ [More Information Needed]
178
+
179
+ ## Citation [optional]
180
+
181
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
182
+
183
+ **BibTeX:**
184
+
185
+ [More Information Needed]
186
+
187
+ **APA:**
188
+
189
+ [More Information Needed]
190
+
191
+ ## Glossary [optional]
192
+
193
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
194
+
195
+ [More Information Needed]
196
+
197
+ ## More Information [optional]
198
+
199
+ [More Information Needed]
200
+
201
+ ## Model Card Authors [optional]
202
+
203
+ [More Information Needed]
204
+
205
+ ## Model Card Contact
206
+
207
+ [More Information Needed]
208
+ ### Framework versions
209
+
210
+ - PEFT 0.17.1
checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "Gemma3ForConditionalGeneration",
5
+ "parent_library": "transformers.models.gemma3.modeling_gemma3",
6
+ "unsloth_fixed": true
7
+ },
8
+ "base_model_name_or_path": "unsloth/medgemma-4b-it-unsloth-bnb-4bit",
9
+ "bias": "none",
10
+ "corda_config": null,
11
+ "eva_config": null,
12
+ "exclude_modules": null,
13
+ "fan_in_fan_out": false,
14
+ "inference_mode": true,
15
+ "init_lora_weights": true,
16
+ "layer_replication": null,
17
+ "layers_pattern": null,
18
+ "layers_to_transform": null,
19
+ "loftq_config": {},
20
+ "lora_alpha": 64,
21
+ "lora_bias": false,
22
+ "lora_dropout": 0.05,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": "(?:.*?(?:vision|image|visual|patch|language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj)))",
32
+ "target_parameters": null,
33
+ "task_type": "CAUSAL_LM",
34
+ "trainable_token_indices": null,
35
+ "use_dora": false,
36
+ "use_qalora": false,
37
+ "use_rslora": true
38
+ }
checkpoint-200/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b8ecc862dc6fdc36aad7f1a82d6d0f4fc70ca2950bc903739211c395bf0f975
3
+ size 616090488
checkpoint-200/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
checkpoint-200/chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78c1ed1d94a0d489b6423262fb5e5adf5412a5763d0a1c6275484348b96a1c95
3
+ size 313733861
checkpoint-200/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }
checkpoint-200/processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 256,
3
+ "processor_class": "Gemma3Processor"
4
+ }
checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49054061db08efc799bac5433268d6cc96370b2cc70f22e801f43497a371666d
3
+ size 14645
checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de7ae5133658f75536394d841847cc74c73bd68c454de91636008e8318c7a743
3
+ size 1465
checkpoint-200/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<end_of_turn>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
checkpoint-200/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7666402c0617d170e6b0a985b3130c3fb0795393aa0970600994a5d9aae12351
3
+ size 33384822
checkpoint-200/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
checkpoint-200/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 0.19150419533252716,
4
+ "best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/train_llm_judge_v2/checkpoint-200",
5
+ "epoch": 4.775757575757575,
6
+ "eval_steps": 50,
7
+ "global_step": 200,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.24242424242424243,
14
+ "grad_norm": 5.080691337585449,
15
+ "learning_rate": 8.571428571428571e-05,
16
+ "loss": 0.3263,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.48484848484848486,
21
+ "grad_norm": 1.2092622518539429,
22
+ "learning_rate": 0.00018095238095238095,
23
+ "loss": 0.0425,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7272727272727273,
28
+ "grad_norm": 0.8844823837280273,
29
+ "learning_rate": 0.0001991171517679013,
30
+ "loss": 0.0253,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.9696969696969697,
35
+ "grad_norm": 0.8625118732452393,
36
+ "learning_rate": 0.0001955572805786141,
37
+ "loss": 0.0199,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 1.1939393939393939,
42
+ "grad_norm": 0.5813594460487366,
43
+ "learning_rate": 0.00018936326403234125,
44
+ "loss": 0.0265,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 1.1939393939393939,
49
+ "eval_loss": 0.22386720776557922,
50
+ "eval_runtime": 55.0529,
51
+ "eval_samples_per_second": 5.958,
52
+ "eval_steps_per_second": 0.381,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 1.4363636363636363,
57
+ "grad_norm": 0.4559573531150818,
58
+ "learning_rate": 0.00018070584725522762,
59
+ "loss": 0.0195,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 1.6787878787878787,
64
+ "grad_norm": 0.4941273033618927,
65
+ "learning_rate": 0.00016982368180860728,
66
+ "loss": 0.0189,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.9212121212121214,
71
+ "grad_norm": 4.042570114135742,
72
+ "learning_rate": 0.0001570167469866962,
73
+ "loss": 0.0187,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.1454545454545455,
78
+ "grad_norm": 0.38869237899780273,
79
+ "learning_rate": 0.0001426380805454254,
80
+ "loss": 0.0227,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 2.3878787878787877,
85
+ "grad_norm": 0.2846849858760834,
86
+ "learning_rate": 0.00012708404681430053,
87
+ "loss": 0.0172,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 2.3878787878787877,
92
+ "eval_loss": 0.19296656548976898,
93
+ "eval_runtime": 35.4935,
94
+ "eval_samples_per_second": 9.241,
95
+ "eval_steps_per_second": 0.592,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 2.6303030303030304,
100
+ "grad_norm": 0.4179554581642151,
101
+ "learning_rate": 0.00011078341046187589,
102
+ "loss": 0.0164,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 2.8727272727272726,
107
+ "grad_norm": 0.2789813280105591,
108
+ "learning_rate": 9.418551710895243e-05,
109
+ "loss": 0.0165,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 3.096969696969697,
114
+ "grad_norm": 0.21186885237693787,
115
+ "learning_rate": 7.774790660436858e-05,
116
+ "loss": 0.021,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 3.3393939393939394,
121
+ "grad_norm": 0.27075108885765076,
122
+ "learning_rate": 6.192370041755505e-05,
123
+ "loss": 0.0154,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 3.581818181818182,
128
+ "grad_norm": 0.2492239624261856,
129
+ "learning_rate": 4.7149110828754464e-05,
130
+ "loss": 0.0153,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 3.581818181818182,
135
+ "eval_loss": 0.19950562715530396,
136
+ "eval_runtime": 35.5167,
137
+ "eval_samples_per_second": 9.235,
138
+ "eval_steps_per_second": 0.591,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 3.824242424242424,
143
+ "grad_norm": 0.20888838171958923,
144
+ "learning_rate": 3.383141624031408e-05,
145
+ "loss": 0.0151,
146
+ "step": 160
147
+ },
148
+ {
149
+ "epoch": 4.048484848484849,
150
+ "grad_norm": 0.18963822722434998,
151
+ "learning_rate": 2.2337734083302164e-05,
152
+ "loss": 0.0197,
153
+ "step": 170
154
+ },
155
+ {
156
+ "epoch": 4.290909090909091,
157
+ "grad_norm": 0.19225962460041046,
158
+ "learning_rate": 1.2984900807073919e-05,
159
+ "loss": 0.0149,
160
+ "step": 180
161
+ },
162
+ {
163
+ "epoch": 4.533333333333333,
164
+ "grad_norm": 0.22141942381858826,
165
+ "learning_rate": 6.030737921409169e-06,
166
+ "loss": 0.0147,
167
+ "step": 190
168
+ },
169
+ {
170
+ "epoch": 4.775757575757575,
171
+ "grad_norm": 0.2131265550851822,
172
+ "learning_rate": 1.66694485272606e-06,
173
+ "loss": 0.0149,
174
+ "step": 200
175
+ },
176
+ {
177
+ "epoch": 4.775757575757575,
178
+ "eval_loss": 0.19150419533252716,
179
+ "eval_runtime": 35.5433,
180
+ "eval_samples_per_second": 9.228,
181
+ "eval_steps_per_second": 0.591,
182
+ "step": 200
183
+ }
184
+ ],
185
+ "logging_steps": 10,
186
+ "max_steps": 210,
187
+ "num_input_tokens_seen": 0,
188
+ "num_train_epochs": 5,
189
+ "save_steps": 50,
190
+ "stateful_callbacks": {
191
+ "EarlyStoppingCallback": {
192
+ "args": {
193
+ "early_stopping_patience": 5,
194
+ "early_stopping_threshold": 0.001
195
+ },
196
+ "attributes": {
197
+ "early_stopping_patience_counter": 0
198
+ }
199
+ },
200
+ "TrainerControl": {
201
+ "args": {
202
+ "should_epoch_stop": false,
203
+ "should_evaluate": false,
204
+ "should_log": false,
205
+ "should_save": true,
206
+ "should_training_stop": false
207
+ },
208
+ "attributes": {}
209
+ }
210
+ },
211
+ "total_flos": 9.050763942946675e+16,
212
+ "train_batch_size": 8,
213
+ "trial_name": null,
214
+ "trial_params": null
215
+ }
checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448416956fd033ecbacc0301c2347c6e7da44e6d59e19be42ee84d5a73ac1775
3
+ size 6417
checkpoint-210/README.md ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: unsloth/medgemma-4b-it-unsloth-bnb-4bit
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:unsloth/medgemma-4b-it-unsloth-bnb-4bit
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ - unsloth
12
+ ---
13
+
14
+ # Model Card for Model ID
15
+
16
+ <!-- Provide a quick summary of what the model is/does. -->
17
+
18
+
19
+
20
+ ## Model Details
21
+
22
+ ### Model Description
23
+
24
+ <!-- Provide a longer summary of what this model is. -->
25
+
26
+
27
+
28
+ - **Developed by:** [More Information Needed]
29
+ - **Funded by [optional]:** [More Information Needed]
30
+ - **Shared by [optional]:** [More Information Needed]
31
+ - **Model type:** [More Information Needed]
32
+ - **Language(s) (NLP):** [More Information Needed]
33
+ - **License:** [More Information Needed]
34
+ - **Finetuned from model [optional]:** [More Information Needed]
35
+
36
+ ### Model Sources [optional]
37
+
38
+ <!-- Provide the basic links for the model. -->
39
+
40
+ - **Repository:** [More Information Needed]
41
+ - **Paper [optional]:** [More Information Needed]
42
+ - **Demo [optional]:** [More Information Needed]
43
+
44
+ ## Uses
45
+
46
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
47
+
48
+ ### Direct Use
49
+
50
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ### Downstream Use [optional]
55
+
56
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Out-of-Scope Use
61
+
62
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
63
+
64
+ [More Information Needed]
65
+
66
+ ## Bias, Risks, and Limitations
67
+
68
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
69
+
70
+ [More Information Needed]
71
+
72
+ ### Recommendations
73
+
74
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
75
+
76
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
77
+
78
+ ## How to Get Started with the Model
79
+
80
+ Use the code below to get started with the model.
81
+
82
+ [More Information Needed]
83
+
84
+ ## Training Details
85
+
86
+ ### Training Data
87
+
88
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
89
+
90
+ [More Information Needed]
91
+
92
+ ### Training Procedure
93
+
94
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
95
+
96
+ #### Preprocessing [optional]
97
+
98
+ [More Information Needed]
99
+
100
+
101
+ #### Training Hyperparameters
102
+
103
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
104
+
105
+ #### Speeds, Sizes, Times [optional]
106
+
107
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
108
+
109
+ [More Information Needed]
110
+
111
+ ## Evaluation
112
+
113
+ <!-- This section describes the evaluation protocols and provides the results. -->
114
+
115
+ ### Testing Data, Factors & Metrics
116
+
117
+ #### Testing Data
118
+
119
+ <!-- This should link to a Dataset Card if possible. -->
120
+
121
+ [More Information Needed]
122
+
123
+ #### Factors
124
+
125
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
126
+
127
+ [More Information Needed]
128
+
129
+ #### Metrics
130
+
131
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
132
+
133
+ [More Information Needed]
134
+
135
+ ### Results
136
+
137
+ [More Information Needed]
138
+
139
+ #### Summary
140
+
141
+
142
+
143
+ ## Model Examination [optional]
144
+
145
+ <!-- Relevant interpretability work for the model goes here -->
146
+
147
+ [More Information Needed]
148
+
149
+ ## Environmental Impact
150
+
151
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
152
+
153
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
154
+
155
+ - **Hardware Type:** [More Information Needed]
156
+ - **Hours used:** [More Information Needed]
157
+ - **Cloud Provider:** [More Information Needed]
158
+ - **Compute Region:** [More Information Needed]
159
+ - **Carbon Emitted:** [More Information Needed]
160
+
161
+ ## Technical Specifications [optional]
162
+
163
+ ### Model Architecture and Objective
164
+
165
+ [More Information Needed]
166
+
167
+ ### Compute Infrastructure
168
+
169
+ [More Information Needed]
170
+
171
+ #### Hardware
172
+
173
+ [More Information Needed]
174
+
175
+ #### Software
176
+
177
+ [More Information Needed]
178
+
179
+ ## Citation [optional]
180
+
181
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
182
+
183
+ **BibTeX:**
184
+
185
+ [More Information Needed]
186
+
187
+ **APA:**
188
+
189
+ [More Information Needed]
190
+
191
+ ## Glossary [optional]
192
+
193
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
194
+
195
+ [More Information Needed]
196
+
197
+ ## More Information [optional]
198
+
199
+ [More Information Needed]
200
+
201
+ ## Model Card Authors [optional]
202
+
203
+ [More Information Needed]
204
+
205
+ ## Model Card Contact
206
+
207
+ [More Information Needed]
208
+ ### Framework versions
209
+
210
+ - PEFT 0.17.1
checkpoint-210/adapter_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "Gemma3ForConditionalGeneration",
5
+ "parent_library": "transformers.models.gemma3.modeling_gemma3",
6
+ "unsloth_fixed": true
7
+ },
8
+ "base_model_name_or_path": "unsloth/medgemma-4b-it-unsloth-bnb-4bit",
9
+ "bias": "none",
10
+ "corda_config": null,
11
+ "eva_config": null,
12
+ "exclude_modules": null,
13
+ "fan_in_fan_out": false,
14
+ "inference_mode": true,
15
+ "init_lora_weights": true,
16
+ "layer_replication": null,
17
+ "layers_pattern": null,
18
+ "layers_to_transform": null,
19
+ "loftq_config": {},
20
+ "lora_alpha": 64,
21
+ "lora_bias": false,
22
+ "lora_dropout": 0.05,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "qalora_group_size": 16,
28
+ "r": 64,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": "(?:.*?(?:vision|image|visual|patch|language|text).*?(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense).*?(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj).*?)|(?:\\bmodel\\.layers\\.[\\d]{1,}\\.(?:self_attn|attention|attn|mlp|feed_forward|ffn|dense)\\.(?:(?:k_proj|v_proj|q_proj|out_proj|fc1|fc2|o_proj|gate_proj|up_proj|down_proj)))",
32
+ "target_parameters": null,
33
+ "task_type": "CAUSAL_LM",
34
+ "trainable_token_indices": null,
35
+ "use_dora": false,
36
+ "use_qalora": false,
37
+ "use_rslora": true
38
+ }
checkpoint-210/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a73201bfeb8af80949c5afc09dcc1fba771ce22c8862061d41c33b8135170cd1
3
+ size 616090488
checkpoint-210/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144
3
+ }
checkpoint-210/chat_template.jinja ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}
2
+ {%- if messages[0]['role'] == 'system' -%}
3
+ {%- if messages[0]['content'] is string -%}
4
+ {%- set first_user_prefix = messages[0]['content'] + '
5
+
6
+ ' -%}
7
+ {%- else -%}
8
+ {%- set first_user_prefix = messages[0]['content'][0]['text'] + '
9
+
10
+ ' -%}
11
+ {%- endif -%}
12
+ {%- set loop_messages = messages[1:] -%}
13
+ {%- else -%}
14
+ {%- set first_user_prefix = "" -%}
15
+ {%- set loop_messages = messages -%}
16
+ {%- endif -%}
17
+ {%- for message in loop_messages -%}
18
+ {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
19
+ {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
20
+ {%- endif -%}
21
+ {%- if (message['role'] == 'assistant') -%}
22
+ {%- set role = "model" -%}
23
+ {%- else -%}
24
+ {%- set role = message['role'] -%}
25
+ {%- endif -%}
26
+ {{ '<start_of_turn>' + role + '
27
+ ' + (first_user_prefix if loop.first else "") }}
28
+ {%- if message['content'] is string -%}
29
+ {{ message['content'] | trim }}
30
+ {%- elif message['content'] is iterable -%}
31
+ {%- for item in message['content'] -%}
32
+ {%- if item['type'] == 'image' -%}
33
+ {{ '<start_of_image>' }}
34
+ {%- elif item['type'] == 'text' -%}
35
+ {{ item['text'] | trim }}
36
+ {%- endif -%}
37
+ {%- endfor -%}
38
+ {%- else -%}
39
+ {{ raise_exception("Invalid content type") }}
40
+ {%- endif -%}
41
+ {{ '<end_of_turn>
42
+ ' }}
43
+ {%- endfor -%}
44
+ {%- if add_generation_prompt -%}
45
+ {{'<start_of_turn>model
46
+ '}}
47
+ {%- endif -%}
checkpoint-210/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d24ad38b661f84aa63cd7234b7152e2c128e6f90df7c27e44fbde09431cca10
3
+ size 313733861
checkpoint-210/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_convert_rgb": null,
3
+ "do_normalize": true,
4
+ "do_pan_and_scan": null,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Gemma3ImageProcessor",
13
+ "image_seq_length": 256,
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "pan_and_scan_max_num_crops": null,
20
+ "pan_and_scan_min_crop_size": null,
21
+ "pan_and_scan_min_ratio_to_activate": null,
22
+ "processor_class": "Gemma3Processor",
23
+ "resample": 2,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 896,
27
+ "width": 896
28
+ }
29
+ }
checkpoint-210/processor_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "image_seq_length": 256,
3
+ "processor_class": "Gemma3Processor"
4
+ }
checkpoint-210/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:213b51163f9fd8aaeb4eb7f64e6f9ff98fe409d7d7e15a82bdad2b3630a3e3f9
3
+ size 14645
checkpoint-210/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b3a7a69473f81cc3a687073f5b9708c3df78eb456efcf86c1ffbb8db2fd0ae1
3
+ size 1465
checkpoint-210/special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<end_of_turn>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
checkpoint-210/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7666402c0617d170e6b0a985b3130c3fb0795393aa0970600994a5d9aae12351
3
+ size 33384822
checkpoint-210/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
checkpoint-210/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-210/trainer_state.json ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 200,
3
+ "best_metric": 0.19150419533252716,
4
+ "best_model_checkpoint": "/teamspace/studios/this_studio/DATN/output/train_llm_judge_v2/checkpoint-200",
5
+ "epoch": 5.0,
6
+ "eval_steps": 50,
7
+ "global_step": 210,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.24242424242424243,
14
+ "grad_norm": 5.080691337585449,
15
+ "learning_rate": 8.571428571428571e-05,
16
+ "loss": 0.3263,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.48484848484848486,
21
+ "grad_norm": 1.2092622518539429,
22
+ "learning_rate": 0.00018095238095238095,
23
+ "loss": 0.0425,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.7272727272727273,
28
+ "grad_norm": 0.8844823837280273,
29
+ "learning_rate": 0.0001991171517679013,
30
+ "loss": 0.0253,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.9696969696969697,
35
+ "grad_norm": 0.8625118732452393,
36
+ "learning_rate": 0.0001955572805786141,
37
+ "loss": 0.0199,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 1.1939393939393939,
42
+ "grad_norm": 0.5813594460487366,
43
+ "learning_rate": 0.00018936326403234125,
44
+ "loss": 0.0265,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 1.1939393939393939,
49
+ "eval_loss": 0.22386720776557922,
50
+ "eval_runtime": 55.0529,
51
+ "eval_samples_per_second": 5.958,
52
+ "eval_steps_per_second": 0.381,
53
+ "step": 50
54
+ },
55
+ {
56
+ "epoch": 1.4363636363636363,
57
+ "grad_norm": 0.4559573531150818,
58
+ "learning_rate": 0.00018070584725522762,
59
+ "loss": 0.0195,
60
+ "step": 60
61
+ },
62
+ {
63
+ "epoch": 1.6787878787878787,
64
+ "grad_norm": 0.4941273033618927,
65
+ "learning_rate": 0.00016982368180860728,
66
+ "loss": 0.0189,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.9212121212121214,
71
+ "grad_norm": 4.042570114135742,
72
+ "learning_rate": 0.0001570167469866962,
73
+ "loss": 0.0187,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.1454545454545455,
78
+ "grad_norm": 0.38869237899780273,
79
+ "learning_rate": 0.0001426380805454254,
80
+ "loss": 0.0227,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 2.3878787878787877,
85
+ "grad_norm": 0.2846849858760834,
86
+ "learning_rate": 0.00012708404681430053,
87
+ "loss": 0.0172,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 2.3878787878787877,
92
+ "eval_loss": 0.19296656548976898,
93
+ "eval_runtime": 35.4935,
94
+ "eval_samples_per_second": 9.241,
95
+ "eval_steps_per_second": 0.592,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 2.6303030303030304,
100
+ "grad_norm": 0.4179554581642151,
101
+ "learning_rate": 0.00011078341046187589,
102
+ "loss": 0.0164,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 2.8727272727272726,
107
+ "grad_norm": 0.2789813280105591,
108
+ "learning_rate": 9.418551710895243e-05,
109
+ "loss": 0.0165,
110
+ "step": 120
111
+ },
112
+ {
113
+ "epoch": 3.096969696969697,
114
+ "grad_norm": 0.21186885237693787,
115
+ "learning_rate": 7.774790660436858e-05,
116
+ "loss": 0.021,
117
+ "step": 130
118
+ },
119
+ {
120
+ "epoch": 3.3393939393939394,
121
+ "grad_norm": 0.27075108885765076,
122
+ "learning_rate": 6.192370041755505e-05,
123
+ "loss": 0.0154,
124
+ "step": 140
125
+ },
126
+ {
127
+ "epoch": 3.581818181818182,
128
+ "grad_norm": 0.2492239624261856,
129
+ "learning_rate": 4.7149110828754464e-05,
130
+ "loss": 0.0153,
131
+ "step": 150
132
+ },
133
+ {
134
+ "epoch": 3.581818181818182,
135
+ "eval_loss": 0.19950562715530396,
136
+ "eval_runtime": 35.5167,
137
+ "eval_samples_per_second": 9.235,
138
+ "eval_steps_per_second": 0.591,
139
+ "step": 150
140
+ },
141
+ {
142
+ "epoch": 3.824242424242424,
143
+ "grad_norm": 0.20888838171958923,
144
+ "learning_rate": 3.383141624031408e-05,
145
+ "loss": 0.0151,
146
+ "step": 160
147
+ },
148
+ {
149
+ "epoch": 4.048484848484849,
150
+ "grad_norm": 0.18963822722434998,
151
+ "learning_rate": 2.2337734083302164e-05,
152
+ "loss": 0.0197,
153
+ "step": 170
154
+ },
155
+ {
156
+ "epoch": 4.290909090909091,
157
+ "grad_norm": 0.19225962460041046,
158
+ "learning_rate": 1.2984900807073919e-05,
159
+ "loss": 0.0149,
160
+ "step": 180
161
+ },
162
+ {
163
+ "epoch": 4.533333333333333,
164
+ "grad_norm": 0.22141942381858826,
165
+ "learning_rate": 6.030737921409169e-06,
166
+ "loss": 0.0147,
167
+ "step": 190
168
+ },
169
+ {
170
+ "epoch": 4.775757575757575,
171
+ "grad_norm": 0.2131265550851822,
172
+ "learning_rate": 1.66694485272606e-06,
173
+ "loss": 0.0149,
174
+ "step": 200
175
+ },
176
+ {
177
+ "epoch": 4.775757575757575,
178
+ "eval_loss": 0.19150419533252716,
179
+ "eval_runtime": 35.5433,
180
+ "eval_samples_per_second": 9.228,
181
+ "eval_steps_per_second": 0.591,
182
+ "step": 200
183
+ },
184
+ {
185
+ "epoch": 5.0,
186
+ "grad_norm": 0.7240699529647827,
187
+ "learning_rate": 1.3814530889433296e-08,
188
+ "loss": 0.0216,
189
+ "step": 210
190
+ }
191
+ ],
192
+ "logging_steps": 10,
193
+ "max_steps": 210,
194
+ "num_input_tokens_seen": 0,
195
+ "num_train_epochs": 5,
196
+ "save_steps": 50,
197
+ "stateful_callbacks": {
198
+ "EarlyStoppingCallback": {
199
+ "args": {
200
+ "early_stopping_patience": 5,
201
+ "early_stopping_threshold": 0.001
202
+ },
203
+ "attributes": {
204
+ "early_stopping_patience_counter": 0
205
+ }
206
+ },
207
+ "TrainerControl": {
208
+ "args": {
209
+ "should_epoch_stop": false,
210
+ "should_evaluate": false,
211
+ "should_log": false,
212
+ "should_save": true,
213
+ "should_training_stop": true
214
+ },
215
+ "attributes": {}
216
+ }
217
+ },
218
+ "total_flos": 9.476004161723347e+16,
219
+ "train_batch_size": 8,
220
+ "trial_name": null,
221
+ "trial_params": null
222
+ }
checkpoint-210/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:448416956fd033ecbacc0301c2347c6e7da44e6d59e19be42ee84d5a73ac1775
3
+ size 6417