Spaces:
Runtime error
Runtime error
| output_dir: "output/cameractrl_model" | |
| pretrained_model_path: "[replace with SVD root path]" | |
| unet_subfolder: "unet" | |
| down_block_types: ['CrossAttnDownBlockSpatioTemporalPoseCond', 'CrossAttnDownBlockSpatioTemporalPoseCond', 'CrossAttnDownBlockSpatioTemporalPoseCond', 'DownBlockSpatioTemporal'] | |
| up_block_types: ['UpBlockSpatioTemporal', 'CrossAttnUpBlockSpatioTemporalPoseCond', 'CrossAttnUpBlockSpatioTemporalPoseCond', 'CrossAttnUpBlockSpatioTemporalPoseCond'] | |
| train_data: | |
| root_path: "[replace RealEstate10K root path]" | |
| annotation_json: "annotations/train.json" | |
| sample_stride: 8 | |
| sample_n_frames: 14 | |
| relative_pose: true | |
| zero_t_first_frame: true | |
| sample_size: [320, 576] | |
| rescale_fxy: true | |
| shuffle_frames: false | |
| use_flip: false | |
| validation_data: | |
| root_path: "[replace RealEstate10K root path]" | |
| annotation_json: "annotations/validation.json" | |
| sample_stride: 8 | |
| sample_n_frames: 14 | |
| relative_pose: true | |
| zero_t_first_frame: true | |
| sample_size: [320, 576] | |
| rescale_fxy: true | |
| shuffle_frames: false | |
| use_flip: false | |
| return_clip_name: true | |
| random_null_image_ratio: 0.15 | |
| pose_encoder_kwargs: | |
| downscale_factor: 8 | |
| channels: [320, 640, 1280, 1280] | |
| nums_rb: 2 | |
| cin: 384 | |
| ksize: 1 | |
| sk: true | |
| use_conv: false | |
| compression_factor: 1 | |
| temporal_attention_nhead: 8 | |
| attention_block_types: ["Temporal_Self", ] | |
| temporal_position_encoding: true | |
| temporal_position_encoding_max_len: 14 | |
| attention_processor_kwargs: | |
| add_spatial: false | |
| add_temporal: true | |
| attn_processor_name: 'attn1' | |
| pose_feature_dimensions: [320, 640, 1280, 1280] | |
| query_condition: true | |
| key_value_condition: true | |
| scale: 1.0 | |
| do_sanity_check: true | |
| sample_before_training: false | |
| max_train_epoch: -1 | |
| max_train_steps: 50000 | |
| validation_steps: 2500 | |
| validation_steps_tuple: [500, ] | |
| learning_rate: 3.e-5 | |
| P_mean: 0.7 | |
| P_std: 1.6 | |
| condition_image_noise_mean: -3.0 | |
| condition_image_noise_std: 0.5 | |
| sample_latent: true | |
| first_image_cond: true | |
| num_inference_steps: 25 | |
| min_guidance_scale: 1.0 | |
| max_guidance_scale: 3.0 | |
| num_workers: 8 | |
| train_batch_size: 1 | |
| checkpointing_epochs: -1 | |
| checkpointing_steps: 10000 | |
| mixed_precision_training: false | |
| enable_xformers_memory_efficient_attention: true | |
| global_seed: 42 | |
| logger_interval: 10 | |