lglg666 commited on
Commit
0730c18
·
verified ·
1 Parent(s): 59daec7

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.yaml +141 -0
  2. model.pt +3 -0
config.yaml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ================ Train Config ================ #
2
+ lyric_processor:
3
+ max_dur: 150
4
+ min_dur: 30
5
+ prompt_len: 10
6
+ pad_to_max: true
7
+
8
+
9
+ # ================ Audio tokenzier ================ #
10
+ audio_tokenizer_checkpoint: Flow1dVAE1rvq_./ckpt/model_1rvq/model_2_fixed.safetensors
11
+ audio_tokenizer_frame_rate: 25
12
+ audio_tokenizer_code_depth: 1
13
+ sample_rate: 48000
14
+
15
+ audio_tokenizer_checkpoint_sep: Flow1dVAESeparate_./ckpt/model_septoken/model_2.safetensors
16
+ audio_tokenizer_frame_rate_sep: 25
17
+ audio_tokenizer_code_depth_sep: 2
18
+ sample_rate_sep: 48000
19
+
20
+ # ================ VAE ================ #
21
+ vae_config: ./ckpt/vae/stable_audio_1920_vae.json
22
+ vae_model: ./ckpt/vae/autoencoder_music_1320k.ckpt
23
+
24
+ # ================== LM =========================== #
25
+ lm:
26
+ lm_type: Llama # [Llama]
27
+ dim: 1536
28
+ intermediate_size: 8960
29
+ num_heads: 12
30
+ num_layers: 28
31
+ num_layers_sub: 12
32
+ code_depth: 3
33
+ code_size: 16384
34
+ max_position_embeddings: 8196
35
+ max_position_embeddings_sub: 10000
36
+ rope_theta: 100000.0
37
+ rope_theta_sub: 500000.0
38
+ dropout: 0.0
39
+ use_flash_attn_2: true
40
+ activation: gelu
41
+ norm_first: true
42
+ bias_ff: false
43
+ bias_attn: false
44
+ causal: true
45
+ custom: false
46
+ memory_efficient: true
47
+ attention_as_float32: false
48
+ layer_scale: null
49
+ positional_embedding: sin
50
+ xpos: false
51
+ checkpointing: torch
52
+ weight_init: gaussian
53
+ depthwise_init: current
54
+ zero_bias_init: true
55
+ norm: layer_norm
56
+ cross_attention: false
57
+ qk_layer_norm: false
58
+ qk_layer_norm_cross: false
59
+ attention_dropout: null
60
+ kv_repeat: 1
61
+
62
+ codebooks_pattern:
63
+ modeling: delay
64
+ delay:
65
+ delays: [ 0, 250, 250 ]
66
+ flatten_first: 0
67
+ empty_initial: 0
68
+
69
+ # ================ Conditioners ===================== #
70
+ classifier_free_guidance:
71
+ # drop all conditions simultaneously
72
+ training_dropout: 0.15
73
+ inference_coef: 1.5
74
+
75
+ attribute_dropout:
76
+ # drop each condition separately
77
+ args:
78
+ active_on_eval: false
79
+ text:
80
+ description: 0.0
81
+ type_info: 0.5
82
+ audio:
83
+ prompt_audio: 0.0
84
+
85
+
86
+ use_text_training: True
87
+ fuser:
88
+ sum: []
89
+ prepend: [ description, prompt_audio, type_info ] # this order is the SAME with the input concatenation order
90
+
91
+ conditioners:
92
+ prompt_audio:
93
+ model: qt_embedding
94
+ qt_embedding:
95
+ code_size: 16384
96
+ code_depth: 3
97
+ max_len: ${eval:${prompt_len}*${audio_tokenizer_frame_rate}+2} # 25*10+2+1
98
+ description:
99
+ model: QwTokenizer
100
+ QwTokenizer:
101
+ token_path: third_party/Qwen2-7B
102
+ max_len: 300
103
+ add_token_list: ${load_yaml:conf/vocab.yaml}
104
+ type_info:
105
+ model: QwTextTokenizer
106
+ QwTextTokenizer:
107
+ token_path: third_party/Qwen2-7B
108
+ max_len: 50
109
+
110
+ offload:
111
+ audiolm:
112
+ offload_module: self
113
+ cpu_mem_gb: 0
114
+ pre_copy_step: 1
115
+ clean_cache_after_forward: false
116
+ dtype: torch.float16
117
+ offload_layer_dict:
118
+ transformer: 4
119
+ transformer2: 4
120
+ ignore_layer_list: []
121
+ clean_cache_wrapper:
122
+ module: self
123
+ method_name: _sample_next_token
124
+ diff_mem_gb_thre: 2
125
+ debug: false
126
+
127
+ wav_tokenizer_diffusion:
128
+ offload_module: self.model.model
129
+ pre_copy_step: 1
130
+ clean_cache_after_forward: false
131
+ cpu_mem_gb: -1
132
+ dtype: null
133
+ offload_layer_dict:
134
+ cfm_wrapper: 5
135
+ hubert: 4
136
+ ignore_layer_list: []
137
+ clean_cache_wrapper:
138
+ module: self.model.model.cfm_wrapper.estimator
139
+ method_name: forward
140
+ diff_mem_gb_thre: 1
141
+ debug: false
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8763fc75e5db768c334a9fbadd08e2004eccb6e15156c76b4c2a3984f8fbb884
3
+ size 11318365872