File size: 5,676 Bytes
e38171c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
ds_cfg:
  train_micro_batch_size_per_gpu: ${per_gpu_train_batch_size}
  gradient_accumulation_steps: ${gradient_accumulation_steps}
  scheduler:
    type: WarmupDecayLR
    params:
      total_num_steps: 989
      warmup_max_lr: ${learning_rate}
      warmup_num_steps: 59
      warmup_type: linear
  optimizer:
    type: AdamW
    params:
      lr: ${learning_rate}
      betas:
      - 0.9
      - 0.95
      eps: 1.0e-06
      weight_decay: ${weight_decay}
  bf16:
    enabled: true
  zero_optimization:
    stage: 1
    stage3_param_persistence_threshold: 100000.0
    stage3_max_live_parameters: 100000000.0
    stage3_prefetch_bucket_size: 100000000.0
    memory_efficient_linear: false
  steps_per_print: 25
  gradient_clipping: 1.0
  prescale_gradients: false
sft_model_dir: experiments/gemma.2b.it.meta_math_distil.H100.w4.v1.0/checkpoint-2000/
train_file: ${sft_model_dir}/meta_math_sub.25k.rap.train.0shot.n10.tem1.0.p0.7.v1.0_clean_fix.prm_cp3200_best_of_10.v1.0.(2,3).pos0.5.neg10.product.full_only_pos2pos.json
dev_file: null
test_file: null
torch_dtype:
  _target_: general_util.training_utils.return_torch_dtype
  dtype: bfloat16
tokenizer_init:
  _target_: general_util.tokenization_utils.init_tokenizer
  tokenizer_path: experiments/gemma.2b.it.meta_math_distil.H100.w4.v1.0/checkpoint-2000/
  padding_side: left
device_map:
  _target_: models.utils.return_single_device_map
model:
  _target_: models.gemma.GemmaForCausalLMDPO.from_pretrained_with_ref_model
  beta: 0.5
  gradient_checkpointing: false
  attn_implementation: flash_attention_2
  torch_dtype: ${torch_dtype}
  device_map: ${device_map}
  ref_model:
    _target_: models.gemma.GemmaForCausalLM.from_pretrained
    pretrained_model_name_or_path: ${model_name_or_path}
    torch_dtype: ${torch_dtype}
    attn_implementation: flash_attention_2
    device_map: ${device_map}
read_tensor_dpo:
  _target_: data.logic_combine.MultiMappingDataset
  file_path: experiments/gemma.2b.it.meta_math_distil.H100.w4.v1.0/checkpoint-2000//meta_math_sub.25k.rap.train.0shot.n10.tem1.0.p0.7.v1.0_clean_fix.json
  aligner:
    _target_: data.input_aligner.concat_aligner
    aligners:
    - _target_: data.input_aligner.dpo_pair_aligner_cleaned
      response_field: response
      id_field: id
      do_sample: false
  template:
    chosen: '{instruction}


      ### Question: {query}


      SubQuestion 1: {pos}<eos>'
    reject: '{instruction}


      ### Question: {query}


      SubQuestion 1: {neg}<eos>'
    prompt: '{instruction}


      ### Question: {query}


      SubQuestion 1:'
  instruction: 'Given a question, please decompose it into sub-questions. For each
    sub-question, please answer it in a complete sentence, ending with "The answer
    is". When the original question is answerable, please start the sub-question with
    "Now we can answer the question: ".'
  kv_mapping:
    chosen: chosen
    reject: reject
    id: index
    prompt: prompt
read_tensor_step_dpo:
  _target_: data.logic_combine.MultiMappingDataset
  aligner:
    _target_: data.logic_combine.field_extract_aligner
    input_index_field: id
    extract_index_field: id
    extract_fields:
    - query
    extra_file: experiments/gemma.2b.it.meta_math_distil.H100.w4.v1.0/checkpoint-2000//meta_math_sub.25k.rap.train.0shot.n10.tem1.0.p0.7.v1.0_clean_fix.json
  template:
    chosen: '{instruction}


      ### Question: {query}


      SubQuestion 1: {chosen}<eos>'
    reject: '{instruction}


      ### Question: {query}


      SubQuestion 1: {reject}<eos>'
    prompt: '{instruction}


      ### Question: {query}


      SubQuestion 1:'
  instruction: 'Given a question, please decompose it into sub-questions. For each
    sub-question, please answer it in a complete sentence, ending with "The answer
    is". When the original question is answerable, please start the sub-question with
    "Now we can answer the question: ".'
  kv_mapping:
    chosen: chosen
    reject: reject
    id: index
    prompt: prompt
read_tensor:
  _target_: data.general.ReplayDataset
  _recursive_: false
  new_dataset_cfg: ${read_tensor_step_dpo}
  old_dataset_cfg: ${read_tensor_dpo}
  replay_ratio: 0.2
dist_load_data_barrier: false
extended_vocab: null
collator:
  _target_: data.dpo.DPOCollator
  tokenizer: ${tokenizer_init}
  max_seq_length: 1024
num_workers: 8
prefetch_factor: 2
model_name_or_path: ${sft_model_dir}
pretrain: null
dp_size: 4
tp_size: 1
pp_size: 1
exp_name: gemma.2b.it.meta_math_rap.step.dpo.H100.w4.v1.0.s${seed}
exp_notes: null
output_dir: experiments/${exp_name}
do_train: true
evaluate_during_training: false
do_eval: false
eval_sub_path: checkpoint-100
per_gpu_train_batch_size: 2
per_gpu_eval_batch_size: 4
learning_rate: 1.0e-06
gradient_accumulation_steps: 8
weight_decay: 0.1
adam_epsilon: 1.0e-06
adam_betas: (0.9, 0.98)
total_dataset_len: 63348
max_grad_norm: 1.0
num_train_epochs: 1
max_steps: 0
warmup_proportion: 0.06
warmup_steps: 0
optimizer: null
use_nvlamb: null
bit_training: null
logging_steps: 5
save_ds_state: false
save_steps: 100
save_best: false
eval_steps: 400
ddp_eval: true
no_cuda: false
seed: 43
local_rank: 0
fp16: true
fp16_opt_level: O1
fp16_bfloat16: true
prediction_cfg:
  metric: loss
  measure: -1
  best_checkpoint: null
  best_result: null
eval_forward_fn:
  _target_: general_util.evaluator.DefaultForwardFn
post_process:
  _target_: post_processors.dpo.DPOEvalPostProcessor
summary_helper:
  _target_: general_util.tensorboard_helper.WandbWriter
  batch_index_or_keys: null
  outputs_index_or_keys:
    train/chosen_reward: chosen_reward
    train/rejected_reward: rejected_reward
n_gpu: 1
device: cuda:0
train_batch_size: 2
eval_batch_size: null
world_size: 4