| data_cfgs: | |
| eval_data_files: null | |
| eval_datasets: null | |
| eval_optional_args: [] | |
| eval_size: null | |
| eval_split: null | |
| eval_subset: null | |
| eval_template: null | |
| ptx_data_files: extracted_panda.json | |
| ptx_datasets: /aifs4su/yaodong/datasets/ShareGPT4Video/extracted | |
| ptx_optional_args: [] | |
| ptx_size: 25000 | |
| ptx_split: train | |
| ptx_subset: null | |
| ptx_template: NExTQA | |
| train_data_files: extracted_preference_10k_washed.json | |
| train_datasets: /aifs4su/yaodong/datasets/aaa_dataset/TV2T-preference/extracted | |
| train_optional_args: [] | |
| train_size: null | |
| train_split: train | |
| train_subset: null | |
| train_template: NExTQA_preference | |
| logger_cfgs: | |
| cache_dir: null | |
| log_project: align-anything | |
| log_run_name: ppo | |
| log_type: wandb | |
| output_dir: ../outputs/ppo_qwen2vl_10k_baseline | |
| save_interval: 300.0 | |
| model_cfgs: | |
| actor_model_name_or_path: /aifs4su/yaodong/models/Qwen2-VL-7B-Instruct | |
| model_max_length: 2048 | |
| repetition_penalty: 1.0 | |
| reward_critic_model_name_or_path: /aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/rm_tiv2t_10k_baseline | |
| reward_model_name_or_path: /aifs4su/yaodong/projects/hantao/dev_cham/align-anything/outputs/rm_tiv2t_10k_baseline | |
| temperature: 1.0 | |
| top_p: 1.0 | |
| trust_remote_code: true | |
| special_tokens: null | |
| train_cfgs: | |
| actor_gradient_checkpointing: true | |
| actor_lr: 5.0e-07 | |
| actor_lr_scheduler_type: cosine | |
| actor_lr_warmup_ratio: 0.03 | |
| actor_weight_decay: 0.0 | |
| adam_betas: | |
| - 0.9 | |
| - 0.95 | |
| bf16: true | |
| clip_range_ratio: 0.2 | |
| clip_range_score: 50.0 | |
| clip_range_value: 5.0 | |
| critic_gradient_checkpointing: true | |
| critic_lr: 5.0e-07 | |
| critic_lr_scheduler_type: constant | |
| critic_lr_warmup_ratio: 0.03 | |
| critic_weight_decay: 0.0 | |
| ds_cfgs: ds_z3_config.json | |
| epochs: 3 | |
| eval_interval: 10 | |
| eval_strategy: epoch | |
| fp16: false | |
| freeze_language_model: false | |
| freeze_mm_proj: false | |
| freeze_vision_tower: true | |
| gae_lambda: 0.95 | |
| gamma: 1.0 | |
| gradient_accumulation_steps: 1 | |
| kl_coeff: 0.02 | |
| normalize_reward: false | |
| per_device_eval_batch_size: 2 | |
| per_device_prompt_batch_size: 2 | |
| per_device_train_batch_size: 2 | |
| ptx_coeff: 16.0 | |
| seed: 42 | |
| update_iters: 1 | |