InspireMusic-Base

Browse files

Files changed (6) hide show

asset/QR.jpg +0 -0
asset/dingding.png +0 -0
music_tokenizer/config.json +43 -0
music_tokenizer/model.pt +3 -0
wavtokenizer/config.yaml +161 -0
wavtokenizer/model.pt +3 -0

asset/QR.jpg ADDED Viewed

asset/dingding.png ADDED Viewed

music_tokenizer/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+    "resblock": "1",
+    "num_gpus": 8,
+    "batch_size": 128,
+    "learning_rate": 0.00002,
+    "adam_b1": 0.5,
+    "adam_b2": 0.9,
+    "lr_decay": 0.98,
+    "seed": 1234,
+    "upsample_rates": [8,5,4,2],
+    "upsample_kernel_sizes": [16,11,8,4],
+    "upsample_initial_channel": 512,
+    "resblock_kernel_sizes": [3,7,11],
+    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
+    "segment_size": 24000,
+    "num_mels": 128,
+    "num_freq": 1024,
+    "n_fft": 1024,
+    "hop_size": 240,
+    "win_size": 1024,
+    "sampling_rate": 24000,
+    "n_code_groups": 2,
+    "n_codes": 1024,
+    "codebook_loss_lambda": 1.0,
+    "commitment_loss_lambda": 0.25,
+    "fmin": 0,
+    "fmax": 12000,
+    "fmax_for_loss": null,
+    "num_workers": 32,
+    "dist_config": {
+        "dist_backend": "nccl",
+        "dist_url": "tcp://localhost:54321",
+        "world_size": 1
+    }
+}

music_tokenizer/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08d26564f725b5b4a38d0c9ea3d8d5e7df42173f15eeb4453930c2bfb93ef783
+size 254691363

wavtokenizer/config.yaml ADDED Viewed

	@@ -0,0 +1,161 @@

+# pytorch_lightning==1.8.6
+seed_everything: 3407
+trainer:
+  logger:
+    class_path: pytorch_lightning.loggers.TensorBoardLogger
+    init_args:
+      save_dir: ./result/
+      name: lightning_logs
+      version: null
+      log_graph: false
+      default_hp_metric: true
+      prefix: ''
+      sub_dir: null
+      logdir: null
+      comment: ''
+      purge_step: null
+      max_queue: 10
+      flush_secs: 120
+      filename_suffix: ''
+      write_to_disk: true
+      comet_config:
+        disabled: true
+  enable_checkpointing: true
+  callbacks:
+  - class_path: pytorch_lightning.callbacks.LearningRateMonitor
+    init_args:
+      logging_interval: null
+      log_momentum: false
+  - class_path: pytorch_lightning.callbacks.ModelSummary
+    init_args:
+      max_depth: 2
+  - class_path: pytorch_lightning.callbacks.ModelCheckpoint
+    init_args:
+      dirpath: null
+      filename: wavtokenizer_checkpoint_{epoch}_{step}_{val_loss:.4f}
+      monitor: val_loss
+      verbose: false
+      save_last: true
+      save_top_k: 10
+      save_weights_only: false
+      mode: min
+      auto_insert_metric_name: true
+      every_n_train_steps: 1000
+      train_time_interval: null
+      every_n_epochs: null
+      save_on_train_epoch_end: null
+  - class_path: inspiremusic.wavtokenizer.decoder.helpers.GradNormCallback
+  default_root_dir: null
+  gradient_clip_val: null
+  gradient_clip_algorithm: null
+  num_nodes: 1
+  num_processes: null
+  devices: -1
+  gpus: null
+  auto_select_gpus: false
+  tpu_cores: null
+  ipus: null
+  enable_progress_bar: true
+  overfit_batches: 0.0
+  track_grad_norm: -1
+  check_val_every_n_epoch: 1
+  fast_dev_run: false
+  accumulate_grad_batches: null
+  max_epochs: null
+  min_epochs: null
+  max_steps: 20000000
+  min_steps: null
+  max_time: null
+  limit_train_batches: null
+  limit_val_batches: 100
+  limit_test_batches: null
+  limit_predict_batches: null
+  val_check_interval: null
+  log_every_n_steps: 1000
+  accelerator: gpu
+  strategy: ddp
+  sync_batchnorm: false
+  precision: 32
+  enable_model_summary: true
+  num_sanity_val_steps: 2
+  resume_from_checkpoint: null
+  profiler: null
+  benchmark: null
+  deterministic: null
+  reload_dataloaders_every_n_epochs: 0
+  auto_lr_find: false
+  replace_sampler_ddp: true
+  detect_anomaly: false
+  auto_scale_batch_size: false
+  plugins: null
+  amp_backend: native
+  amp_level: null
+  move_metrics_to_cpu: false
+  multiple_trainloader_mode: max_size_cycle
+  inference_mode: true
+ckpt_path: null
+data:
+  class_path: inspiremusic.wavtokenizer.decoder.dataset.VocosDataModule
+  init_args:
+    train_params:
+      filelist_path: train.scp
+      sampling_rate: 24000
+      num_samples: 72000
+      batch_size: 38
+      num_workers: 8
+    val_params:
+      filelist_path: test.scp
+      sampling_rate: 24000
+      num_samples: 72000
+      batch_size: 10
+      num_workers: 8
+model:
+  class_path: inspiremusic.wavtokenizer.decoder.experiment.WavTokenizer
+  init_args:
+    feature_extractor:
+      class_path: inspiremusic.wavtokenizer.decoder.feature_extractors.EncodecFeatures
+      init_args:
+        encodec_model: encodec_24khz
+        bandwidths:
+        - 6.6
+        - 6.6
+        - 6.6
+        - 6.6
+        train_codebooks: true
+        num_quantizers: 1
+        dowmsamples:
+        - 8
+        - 5
+        - 4
+        - 2
+        vq_bins: 4096
+        vq_kmeans: 200
+    backbone:
+      class_path: inspiremusic.wavtokenizer.decoder.models.VocosBackbone
+      init_args:
+        input_channels: 512
+        dim: 768
+        intermediate_dim: 2304
+        num_layers: 12
+        layer_scale_init_value: null
+        adanorm_num_embeddings: 4
+    head:
+      class_path: inspiremusic.wavtokenizer.decoder.heads.ISTFTHead
+      init_args:
+        dim: 768
+        n_fft: 1280
+        hop_length: 320
+        padding: same
+    resume_config: config.yaml
+    resume_model: last.ckpt
+    sample_rate: 24000
+    initial_learning_rate: 0.0001
+    num_warmup_steps: 0
+    mel_loss_coeff: 45.0
+    mrd_loss_coeff: 1.0
+    pretrain_mel_steps: 0
+    decay_mel_coeff: false
+    evaluate_utmos: false
+    evaluate_pesq: true
+    evaluate_periodicty: true
+    resume: true

wavtokenizer/model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65dc00edbd293c0b4de81045648688207e5e69f1c32025beaaba0eb273fa851c
+size 1754883448