Chong Zhang
		
	commited on
		
		
					init
Browse files- music_tokenizer/config.json +42 -0
- music_tokenizer/model.pt +3 -0
- wavtokenizer/config.yaml +164 -0
- wavtokenizer/model.pt +3 -0
    	
        music_tokenizer/config.json
    ADDED
    
    | @@ -0,0 +1,42 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
                "resblock": "1",
         | 
| 3 | 
            +
                "num_gpus": 8,
         | 
| 4 | 
            +
                "batch_size": 140,
         | 
| 5 | 
            +
                "learning_rate": 0.00002,
         | 
| 6 | 
            +
                "adam_b1": 0.5,
         | 
| 7 | 
            +
                "adam_b2": 0.9,
         | 
| 8 | 
            +
                "lr_decay": 0.98,
         | 
| 9 | 
            +
                "seed": 1234,
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                "upsample_rates": [8,5,4,2],
         | 
| 12 | 
            +
                "upsample_kernel_sizes": [16,11,8,4],
         | 
| 13 | 
            +
                "upsample_initial_channel": 512,
         | 
| 14 | 
            +
                "resblock_kernel_sizes": [3,5,7,9,11,13],
         | 
| 15 | 
            +
                "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5], [1,3,5], [1,3,5], [1,3,5]],
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                "segment_size": 48000,
         | 
| 18 | 
            +
                "num_mels": 80,
         | 
| 19 | 
            +
                "num_freq": 1024,
         | 
| 20 | 
            +
                "n_fft": 1024,
         | 
| 21 | 
            +
                "hop_size": 240,
         | 
| 22 | 
            +
                "win_size": 1024,
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                "sampling_rate": 48000,
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                "n_code_groups": 2,
         | 
| 27 | 
            +
                "n_codes": 1024,
         | 
| 28 | 
            +
                "codebook_loss_lambda": 1.0,
         | 
| 29 | 
            +
                "commitment_loss_lambda": 0.25,
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                "fmin": 0,
         | 
| 32 | 
            +
                "fmax": 48000,
         | 
| 33 | 
            +
                "fmax_for_loss": null,
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                "num_workers": 24,
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                "dist_config": {
         | 
| 38 | 
            +
                    "dist_backend": "nccl",
         | 
| 39 | 
            +
                    "dist_url": "tcp://localhost:54321",
         | 
| 40 | 
            +
                    "world_size": 1
         | 
| 41 | 
            +
                }
         | 
| 42 | 
            +
            }
         | 
    	
        music_tokenizer/model.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ba71efdc50378baf9776d607eb11566907c3810e6f221c316719c02591135626
         | 
| 3 | 
            +
            size 537087507
         | 
    	
        wavtokenizer/config.yaml
    ADDED
    
    | @@ -0,0 +1,164 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # pytorch_lightning==1.8.6
         | 
| 2 | 
            +
            seed_everything: 3407
         | 
| 3 | 
            +
            trainer:
         | 
| 4 | 
            +
              logger:
         | 
| 5 | 
            +
                class_path: pytorch_lightning.loggers.TensorBoardLogger
         | 
| 6 | 
            +
                init_args:
         | 
| 7 | 
            +
                  save_dir: ./result/
         | 
| 8 | 
            +
                  name: lightning_logs
         | 
| 9 | 
            +
                  version: null
         | 
| 10 | 
            +
                  log_graph: false
         | 
| 11 | 
            +
                  default_hp_metric: true
         | 
| 12 | 
            +
                  prefix: ''
         | 
| 13 | 
            +
                  sub_dir: null
         | 
| 14 | 
            +
                  logdir: null
         | 
| 15 | 
            +
                  comment: ''
         | 
| 16 | 
            +
                  purge_step: null
         | 
| 17 | 
            +
                  max_queue: 10
         | 
| 18 | 
            +
                  flush_secs: 120
         | 
| 19 | 
            +
                  filename_suffix: ''
         | 
| 20 | 
            +
                  write_to_disk: true
         | 
| 21 | 
            +
                  comet_config:
         | 
| 22 | 
            +
                    disabled: true
         | 
| 23 | 
            +
              enable_checkpointing: true
         | 
| 24 | 
            +
              callbacks:
         | 
| 25 | 
            +
              - class_path: pytorch_lightning.callbacks.LearningRateMonitor
         | 
| 26 | 
            +
                init_args:
         | 
| 27 | 
            +
                  logging_interval: null
         | 
| 28 | 
            +
                  log_momentum: false
         | 
| 29 | 
            +
              - class_path: pytorch_lightning.callbacks.ModelSummary
         | 
| 30 | 
            +
                init_args:
         | 
| 31 | 
            +
                  max_depth: 2
         | 
| 32 | 
            +
              - class_path: pytorch_lightning.callbacks.ModelCheckpoint
         | 
| 33 | 
            +
                init_args:
         | 
| 34 | 
            +
                  dirpath: null
         | 
| 35 | 
            +
                  filename: wavtokenizer_checkpoint_{epoch}_{step}_{val_loss:.4f}
         | 
| 36 | 
            +
                  monitor: val_loss
         | 
| 37 | 
            +
                  verbose: false
         | 
| 38 | 
            +
                  save_last: true
         | 
| 39 | 
            +
                  save_top_k: 10
         | 
| 40 | 
            +
                  save_weights_only: false
         | 
| 41 | 
            +
                  mode: min
         | 
| 42 | 
            +
                  auto_insert_metric_name: true
         | 
| 43 | 
            +
                  every_n_train_steps: 1000
         | 
| 44 | 
            +
                  train_time_interval: null
         | 
| 45 | 
            +
                  every_n_epochs: null
         | 
| 46 | 
            +
                  save_on_train_epoch_end: null
         | 
| 47 | 
            +
              - class_path: inspiremusic.wavtokenizer.decoder.helpers.GradNormCallback
         | 
| 48 | 
            +
              default_root_dir: null
         | 
| 49 | 
            +
              gradient_clip_val: null
         | 
| 50 | 
            +
              gradient_clip_algorithm: null
         | 
| 51 | 
            +
              num_nodes: 1
         | 
| 52 | 
            +
              num_processes: null
         | 
| 53 | 
            +
              devices: -1
         | 
| 54 | 
            +
              gpus: null
         | 
| 55 | 
            +
              auto_select_gpus: false
         | 
| 56 | 
            +
              tpu_cores: null
         | 
| 57 | 
            +
              ipus: null
         | 
| 58 | 
            +
              enable_progress_bar: true
         | 
| 59 | 
            +
              overfit_batches: 0.0
         | 
| 60 | 
            +
              track_grad_norm: -1
         | 
| 61 | 
            +
              check_val_every_n_epoch: 1
         | 
| 62 | 
            +
              fast_dev_run: false
         | 
| 63 | 
            +
              accumulate_grad_batches: null
         | 
| 64 | 
            +
              max_epochs: null
         | 
| 65 | 
            +
              min_epochs: null
         | 
| 66 | 
            +
              max_steps: 20000000
         | 
| 67 | 
            +
              min_steps: null
         | 
| 68 | 
            +
              max_time: null
         | 
| 69 | 
            +
              limit_train_batches: null
         | 
| 70 | 
            +
              limit_val_batches: 100
         | 
| 71 | 
            +
              limit_test_batches: null
         | 
| 72 | 
            +
              limit_predict_batches: null
         | 
| 73 | 
            +
              val_check_interval: null
         | 
| 74 | 
            +
              log_every_n_steps: 1000
         | 
| 75 | 
            +
              accelerator: gpu
         | 
| 76 | 
            +
              strategy: ddp
         | 
| 77 | 
            +
              sync_batchnorm: false
         | 
| 78 | 
            +
              precision: 32
         | 
| 79 | 
            +
              enable_model_summary: true
         | 
| 80 | 
            +
              num_sanity_val_steps: 2
         | 
| 81 | 
            +
              resume_from_checkpoint: null
         | 
| 82 | 
            +
              profiler: null
         | 
| 83 | 
            +
              benchmark: null
         | 
| 84 | 
            +
              deterministic: null
         | 
| 85 | 
            +
              reload_dataloaders_every_n_epochs: 0
         | 
| 86 | 
            +
              auto_lr_find: false
         | 
| 87 | 
            +
              replace_sampler_ddp: true
         | 
| 88 | 
            +
              detect_anomaly: false
         | 
| 89 | 
            +
              auto_scale_batch_size: false
         | 
| 90 | 
            +
              plugins: null
         | 
| 91 | 
            +
              amp_backend: native
         | 
| 92 | 
            +
              amp_level: null
         | 
| 93 | 
            +
              move_metrics_to_cpu: false
         | 
| 94 | 
            +
              multiple_trainloader_mode: max_size_cycle
         | 
| 95 | 
            +
              inference_mode: true
         | 
| 96 | 
            +
            ckpt_path: null
         | 
| 97 | 
            +
            data:
         | 
| 98 | 
            +
              class_path: inspiremusic.wavtokenizer.decoder.dataset.VocosDataModule
         | 
| 99 | 
            +
              init_args:
         | 
| 100 | 
            +
                train_params:
         | 
| 101 | 
            +
                  filelist_path: train.scp
         | 
| 102 | 
            +
                  sampling_rate: 24000
         | 
| 103 | 
            +
                  num_samples: 72000
         | 
| 104 | 
            +
                  batch_size: 38
         | 
| 105 | 
            +
                  num_workers: 8
         | 
| 106 | 
            +
                val_params:
         | 
| 107 | 
            +
                  filelist_path: test.scp
         | 
| 108 | 
            +
                  sampling_rate: 24000
         | 
| 109 | 
            +
                  num_samples: 72000
         | 
| 110 | 
            +
                  batch_size: 10
         | 
| 111 | 
            +
                  num_workers: 8
         | 
| 112 | 
            +
            model:
         | 
| 113 | 
            +
              class_path: inspiremusic.wavtokenizer.decoder.experiment.WavTokenizer
         | 
| 114 | 
            +
              init_args:
         | 
| 115 | 
            +
                feature_extractor:
         | 
| 116 | 
            +
                  class_path: inspiremusic.wavtokenizer.decoder.feature_extractors.EncodecFeatures
         | 
| 117 | 
            +
                  init_args:
         | 
| 118 | 
            +
                    encodec_model: encodec_24khz
         | 
| 119 | 
            +
                    bandwidths:
         | 
| 120 | 
            +
                    - 6.6
         | 
| 121 | 
            +
                    - 6.6
         | 
| 122 | 
            +
                    - 6.6
         | 
| 123 | 
            +
                    - 6.6
         | 
| 124 | 
            +
                    train_codebooks: true
         | 
| 125 | 
            +
                    num_quantizers: 1
         | 
| 126 | 
            +
                    dowmsamples:
         | 
| 127 | 
            +
                    - 8
         | 
| 128 | 
            +
                    - 5
         | 
| 129 | 
            +
                    - 4
         | 
| 130 | 
            +
                    - 2
         | 
| 131 | 
            +
                    vq_bins: 4096
         | 
| 132 | 
            +
                    vq_kmeans: 200
         | 
| 133 | 
            +
                backbone:
         | 
| 134 | 
            +
                  class_path: inspiremusic.wavtokenizer.decoder.models.VocosBackbone
         | 
| 135 | 
            +
                  init_args:
         | 
| 136 | 
            +
                    input_channels: 512
         | 
| 137 | 
            +
                    dim: 768
         | 
| 138 | 
            +
                    intermediate_dim: 2304
         | 
| 139 | 
            +
                    num_layers: 12
         | 
| 140 | 
            +
                    layer_scale_init_value: null
         | 
| 141 | 
            +
                    adanorm_num_embeddings: 4
         | 
| 142 | 
            +
                head:
         | 
| 143 | 
            +
                  class_path: inspiremusic.wavtokenizer.decoder.heads.ISTFTHead
         | 
| 144 | 
            +
                  init_args:
         | 
| 145 | 
            +
                    dim: 768
         | 
| 146 | 
            +
                    n_fft: 1280
         | 
| 147 | 
            +
                    hop_length: 320
         | 
| 148 | 
            +
                    padding: same
         | 
| 149 | 
            +
                resume_config: config.yaml
         | 
| 150 | 
            +
                resume_model: last.ckpt
         | 
| 151 | 
            +
                sample_rate: 24000
         | 
| 152 | 
            +
                initial_learning_rate: 0.0001
         | 
| 153 | 
            +
                num_warmup_steps: 0
         | 
| 154 | 
            +
                mel_loss_coeff: 45.0
         | 
| 155 | 
            +
                mrd_loss_coeff: 1.0
         | 
| 156 | 
            +
                pretrain_mel_steps: 0
         | 
| 157 | 
            +
                decay_mel_coeff: false
         | 
| 158 | 
            +
                evaluate_utmos: false
         | 
| 159 | 
            +
                evaluate_pesq: true
         | 
| 160 | 
            +
                evaluate_periodicty: true
         | 
| 161 | 
            +
                resume: true
         | 
| 162 | 
            +
             | 
| 163 | 
            +
             | 
| 164 | 
            +
             | 
    	
        wavtokenizer/model.pt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:65dc00edbd293c0b4de81045648688207e5e69f1c32025beaaba0eb273fa851c
         | 
| 3 | 
            +
            size 1754883448
         |