Spaces:

3DAIGC
/

LHM

Running on Zero

App Files Files Community

LHM / configs /inference /human-lrm-500M.yaml

QZFantasies

add wheels

c614b0f 8 months ago

raw

history blame contribute delete

4.57 kB

	# LHM-500M
	experiment:
	type: lrm
	seed: 42
	parent: video_human_benchmark
	child: human-lrm-500M
	model:
	# image encoder
	model_name: SapDinoLRMBHSD3_5
	encoder_type: dinov2_fusion
	encoder_model_name: "dinov2_vitl14_reg"
	encoder_feat_dim: 1024 # dinov2 embeding size 1024
	encoder_freeze: False

	fine_encoder_type: sapiens
	fine_encoder_model_name: "./pretrained_models/sapiens/pretrained/checkpoints/sapiens_1b/sapiens_1b_epoch_173_torchscript.pt2" # sapiens pretrained model path
	fine_encoder_feat_dim: 1536 # sapiens embeding size 1024
	fine_encoder_freeze: True

	use_face_id: True

	# points embeddings
	# num_pcl: 10240
	latent_query_points_type: "e2e_smplx_sub1"
	pcl_dim: 1024
	facesr: True

	transformer_type: "sd3_mm_bh_cond" # multi-modal BH attention.
	transformer_heads: 16 # 30
	transformer_dim: 1024 # 30 * 64=1920
	transformer_layers: 5 # 30
	tf_grad_ckpt: true
	encoder_grad_ckpt: true

	# for gs renderer
	human_model_path: "./pretrained_models/human_model_files"
	smplx_subdivide_num: 1
	smplx_type: "smplx_2"
	gs_query_dim: 1024
	gs_use_rgb: True
	gs_sh: 3
	dense_sample_pts: 40000 # 4,000
	gs_mlp_network_config:
	n_neurons: 512
	n_hidden_layers: 2
	activation: silu
	# gs_xyz_offset_max_step: 0.05625 # 1.8 / 32
	# gs_clip_scaling: 0.2 # avoid too large Sphere
	gs_xyz_offset_max_step: 1. # 1.8 / 32
	gs_clip_scaling: [100, 0.01, 0.05, 3000] # [start, start_v, end_v, end]
	expr_param_dim: 100
	shape_param_dim: 10

	fix_opacity: False
	fix_rotation: False
	cano_pose_type: 1 # 0 means exavatar-pose 1 indicates REC-MV pose

	dataset:
	subsets:
	- name: video_human_flame
	root_dirs: "./train_data/ClothVideo"
	meta_path:
	train: "./train_data/ClothVideo/label/valid_id_with_img_list_clean_30W.json"
	val: "./train_data/ClothVideo/label/valid_id_with_img_list_val.json"
	sample_rate: 1.0
	use_flame: True
	src_head_size: 112
	- name: video_human_flame_v2
	root_dirs: "./train_data/ClothVideo"
	meta_path:
	train: "./train_data/ClothVideo/label/valid_synthetic_data_train.json"
	val: "./train_data/ClothVideo/label/valid_synthetic_data_val.json"
	sample_rate: 1.0
	use_flame: True
	src_head_size: 112
	sample_side_views: 5
	source_image_res: 1024
	src_head_size: 112
	render_image:
	low: 512
	high: 512
	region: null
	num_train_workers: 4
	multiply: 16 # dino features
	num_val_workers: 2
	pin_mem: true
	repeat_num: 1

	train:
	mixed_precision: bf16 # REPLACE THIS BASED ON GPU TYPE
	find_unused_parameters: false
	loss_func:
	pixel_loss: l1 # L1 or MSE
	ball_loss:
	type: heuristic # heuristic ball_loss
	group:
	head: 1.
	lower_body: 100.
	upper_body: 1000.
	hands: 10000.
	offset_loss:
	type: classical # heuristic ball_loss
	group:
	head: 1.
	lower_body: 1.
	upper_body: 100.
	hands: 1000.
	loss:
	pixel_weight: 0.0
	masked_pixel_weight: 1.0
	masked_head_weight: 0.0
	perceptual_weight: 1.0
	# tv_weight: 5e-4
	tv_weight: -1
	mask_weight: 1.0
	face_id_weight: 0.05
	asap_weight: 10.0 # ball loss
	acap_weight: 1000.0 # offset loss
	optim:
	lr: 4e-5
	weight_decay: 0.05
	beta1: 0.9
	beta2: 0.95
	clip_grad_norm: 0.1 # diffusion model
	scheduler:
	type: cosine
	warmup_real_iters: 0
	batch_size: 4 # REPLACE THIS (PER GPU)
	accum_steps: 1 # REPLACE THIS
	epochs: 60 # REPLACE THIS
	debug_global_steps: null

	val:
	batch_size: 2
	global_step_period: 1000
	debug_batches: 10

	saver:
	auto_resume: True
	load_model: None
	checkpoint_root: ./exps/checkpoints
	checkpoint_global_steps: 1000
	checkpoint_keep_level: 60

	logger:
	stream_level: WARNING
	log_level: INFO
	log_root: ./exps/logs
	tracker_root: ./exps/trackers
	enable_profiler: false
	trackers:
	- tensorboard
	image_monitor:
	train_global_steps: 100
	samples_per_log: 4

	compile:
	suppress_errors: true
	print_specializations: true
	disable: true