leeway.zlw commited on Oct 28, 2024

Commit

226dc7e

1 Parent(s): 508c460

feat: initialize hallo2 weights

Files changed (34) hide show

CodeFormer/codeformer.pth +3 -0
CodeFormer/vqgan_code1024.pth +3 -0
audio_separator/Kim_Vocal_2.onnx +3 -0
audio_separator/download_checks.json +231 -0
audio_separator/mdx_model_data.json +384 -0
audio_separator/vr_model_data.json +137 -0
face_analysis/models/1k3d68.onnx +3 -0
face_analysis/models/2d106det.onnx +3 -0
face_analysis/models/face_landmarker_v2_with_blendshapes.task +3 -0
face_analysis/models/genderage.onnx +3 -0
face_analysis/models/glintr100.onnx +3 -0
face_analysis/models/scrfd_10g_bnkps.onnx +3 -0
facelib/detection_Resnet50_Final.pth +3 -0
facelib/detection_mobilenet0.25_Final.pth +3 -0
facelib/parsing_parsenet.pth +3 -0
facelib/yolov5l-face.pth +3 -0
facelib/yolov5n-face.pth +3 -0
hallo2/net.pth +3 -0
hallo2/net_g.pth +3 -0
motion_module/mm_sd_v15_v2.ckpt +3 -0
realesrgan/RealESRGAN_x2plus.pth +3 -0
sd-vae-ft-mse/README.md +83 -0
sd-vae-ft-mse/config.json +29 -0
sd-vae-ft-mse/diffusion_pytorch_model.safetensors +3 -0
stable-diffusion-v1-5/unet/config.json +36 -0
stable-diffusion-v1-5/unet/diffusion_pytorch_model.safetensors +3 -0
wav2vec/wav2vec2-base-960h/README.md +128 -0
wav2vec/wav2vec2-base-960h/config.json +77 -0
wav2vec/wav2vec2-base-960h/feature_extractor_config.json +8 -0
wav2vec/wav2vec2-base-960h/model.safetensors +3 -0
wav2vec/wav2vec2-base-960h/preprocessor_config.json +8 -0
wav2vec/wav2vec2-base-960h/special_tokens_map.json +1 -0
wav2vec/wav2vec2-base-960h/tokenizer_config.json +1 -0
wav2vec/wav2vec2-base-960h/vocab.json +1 -0

CodeFormer/codeformer.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1009e537e0c2a07d4cabce6355f53cb66767cd4b4297ec7a4a64ca4b8a5684b7
+size 376637898

CodeFormer/vqgan_code1024.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d1c6741b3cffcbdc2cd1a12b2c3c2442282e042d5de66909cb643d4fa31b20f
+size 255078528

audio_separator/Kim_Vocal_2.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b
+size 66759214

audio_separator/download_checks.json ADDED Viewed

	@@ -0,0 +1,231 @@

+{
+    "current_version": "UVR_Patch_10_6_23_4_27",
+    "current_version_ocl": "UVR_Patch_10_6_23_4_27",
+    "current_version_mac": "UVR_Patch_10_6_23_4_27",
+    "current_version_linux": "UVR_Patch_10_6_23_4_27",
+    "vr_download_list": {
+                        "VR Arch Single Model v5: 1_HP-UVR": "1_HP-UVR.pth",
+                        "VR Arch Single Model v5: 2_HP-UVR": "2_HP-UVR.pth",
+                        "VR Arch Single Model v5: 3_HP-Vocal-UVR": "3_HP-Vocal-UVR.pth",
+                        "VR Arch Single Model v5: 4_HP-Vocal-UVR": "4_HP-Vocal-UVR.pth",
+                        "VR Arch Single Model v5: 5_HP-Karaoke-UVR": "5_HP-Karaoke-UVR.pth",
+                        "VR Arch Single Model v5: 6_HP-Karaoke-UVR": "6_HP-Karaoke-UVR.pth",
+                        "VR Arch Single Model v5: 7_HP2-UVR": "7_HP2-UVR.pth",
+                        "VR Arch Single Model v5: 8_HP2-UVR": "8_HP2-UVR.pth",
+                        "VR Arch Single Model v5: 9_HP2-UVR": "9_HP2-UVR.pth",
+                        "VR Arch Single Model v5: 10_SP-UVR-2B-32000-1": "10_SP-UVR-2B-32000-1.pth",
+                        "VR Arch Single Model v5: 11_SP-UVR-2B-32000-2": "11_SP-UVR-2B-32000-2.pth",
+                        "VR Arch Single Model v5: 12_SP-UVR-3B-44100": "12_SP-UVR-3B-44100.pth",
+                        "VR Arch Single Model v5: 13_SP-UVR-4B-44100-1": "13_SP-UVR-4B-44100-1.pth",
+                        "VR Arch Single Model v5: 14_SP-UVR-4B-44100-2": "14_SP-UVR-4B-44100-2.pth",
+                        "VR Arch Single Model v5: 15_SP-UVR-MID-44100-1": "15_SP-UVR-MID-44100-1.pth",
+                        "VR Arch Single Model v5: 16_SP-UVR-MID-44100-2": "16_SP-UVR-MID-44100-2.pth",
+                        "VR Arch Single Model v5: 17_HP-Wind_Inst-UVR": "17_HP-Wind_Inst-UVR.pth",
+                        "VR Arch Single Model v5: UVR-De-Echo-Aggressive by FoxJoy": "UVR-De-Echo-Aggressive.pth",
+                        "VR Arch Single Model v5: UVR-De-Echo-Normal by FoxJoy": "UVR-De-Echo-Normal.pth",
+                        "VR Arch Single Model v5: UVR-DeEcho-DeReverb by FoxJoy": "UVR-DeEcho-DeReverb.pth",
+                        "VR Arch Single Model v5: UVR-DeNoise-Lite by FoxJoy": "UVR-DeNoise-Lite.pth",
+                        "VR Arch Single Model v5: UVR-DeNoise by FoxJoy": "UVR-DeNoise.pth",
+                        "VR Arch Single Model v5: UVR-BVE-4B_SN-44100-1": "UVR-BVE-4B_SN-44100-1.pth",
+                        "VR Arch Single Model v4: MGM_HIGHEND_v4": "MGM_HIGHEND_v4.pth",
+                        "VR Arch Single Model v4: MGM_LOWEND_A_v4": "MGM_LOWEND_A_v4.pth",
+                        "VR Arch Single Model v4: MGM_LOWEND_B_v4": "MGM_LOWEND_B_v4.pth",
+                        "VR Arch Single Model v4: MGM_MAIN_v4": "MGM_MAIN_v4.pth"
+                },
+    "mdx_download_list": {
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 1": "UVR-MDX-NET-Inst_HQ_1.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 2": "UVR-MDX-NET-Inst_HQ_2.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 3": "UVR-MDX-NET-Inst_HQ_3.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst HQ 4": "UVR-MDX-NET-Inst_HQ_4.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Main": "UVR_MDXNET_Main.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst Main": "UVR-MDX-NET-Inst_Main.onnx",
+                        "MDX-Net Model: UVR-MDX-NET 1": "UVR_MDXNET_1_9703.onnx",
+                        "MDX-Net Model: UVR-MDX-NET 2": "UVR_MDXNET_2_9682.onnx",
+                        "MDX-Net Model: UVR-MDX-NET 3": "UVR_MDXNET_3_9662.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst 1": "UVR-MDX-NET-Inst_1.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst 2": "UVR-MDX-NET-Inst_2.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Inst 3": "UVR-MDX-NET-Inst_3.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Karaoke": "UVR_MDXNET_KARA.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Karaoke 2": "UVR_MDXNET_KARA_2.onnx",
+                        "MDX-Net Model: UVR_MDXNET_9482": "UVR_MDXNET_9482.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Voc FT": "UVR-MDX-NET-Voc_FT.onnx",
+                        "MDX-Net Model: Kim Vocal 1": "Kim_Vocal_1.onnx",
+                        "MDX-Net Model: Kim Vocal 2": "Kim_Vocal_2.onnx",
+                        "MDX-Net Model: Kim Inst": "Kim_Inst.onnx",
+                        "MDX-Net Model: Reverb HQ By FoxJoy": "Reverb_HQ_By_FoxJoy.onnx",
+                        "MDX-Net Model: UVR-MDX-NET Crowd HQ 1 By Aufr33": "UVR-MDX-NET_Crowd_HQ_1.onnx",
+                        "MDX-Net Model: kuielab_a_vocals": "kuielab_a_vocals.onnx",
+                        "MDX-Net Model: kuielab_a_other": "kuielab_a_other.onnx",
+                        "MDX-Net Model: kuielab_a_bass": "kuielab_a_bass.onnx",
+                        "MDX-Net Model: kuielab_a_drums": "kuielab_a_drums.onnx",
+                        "MDX-Net Model: kuielab_b_vocals": "kuielab_b_vocals.onnx",
+                        "MDX-Net Model: kuielab_b_other": "kuielab_b_other.onnx",
+                        "MDX-Net Model: kuielab_b_bass": "kuielab_b_bass.onnx",
+                        "MDX-Net Model: kuielab_b_drums": "kuielab_b_drums.onnx"
+                        },
+    "demucs_download_list":{
+                "Demucs v4: htdemucs_ft":{
+                                "f7e0c4bc-ba3fe64a.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/f7e0c4bc-ba3fe64a.th",
+                                "d12395a8-e57c48e6.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/d12395a8-e57c48e6.th",
+                                "92cfc3b6-ef3bcb9c.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/92cfc3b6-ef3bcb9c.th",
+                                "04573f0d-f3cf25b2.th":"https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/04573f0d-f3cf25b2.th",
+                                "htdemucs_ft.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs_ft.yaml"
+                                },
+                "Demucs v4: htdemucs":{
+                                "955717e8-8726e21a.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/955717e8-8726e21a.th",
+                                "htdemucs.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs.yaml"
+                                },
+                "Demucs v4: hdemucs_mmi":{
+                                "75fc33f5-1941ce65.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/75fc33f5-1941ce65.th",
+                                "hdemucs_mmi.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/hdemucs_mmi.yaml"
+                                },
+                "Demucs v4: htdemucs_6s":{
+                                "5c90dfd2-34c22ccb.th": "https://dl.fbaipublicfiles.com/demucs/hybrid_transformer/5c90dfd2-34c22ccb.th",
+                                "htdemucs_6s.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/htdemucs_6s.yaml"
+                                },
+                "Demucs v3: mdx":{
+                                "0d19c1c6-0f06f20e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/0d19c1c6-0f06f20e.th",
+                                "7ecf8ec1-70f50cc9.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7ecf8ec1-70f50cc9.th",
+                                "c511e2ab-fe698775.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/c511e2ab-fe698775.th",
+                                "7d865c68-3d5dd56b.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7d865c68-3d5dd56b.th",
+                                "mdx.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx.yaml"
+                                },
+                "Demucs v3: mdx_q":{
+                                "6b9c2ca1-3fd82607.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/6b9c2ca1-3fd82607.th",
+                                "b72baf4e-8778635e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/b72baf4e-8778635e.th",
+                                "42e558d4-196e0e1b.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/42e558d4-196e0e1b.th",
+                                "305bc58f-18378783.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/305bc58f-18378783.th",
+                                "mdx_q.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_q.yaml"
+                                },
+                "Demucs v3: mdx_extra":{
+                                "e51eebcc-c1b80bdd.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/e51eebcc-c1b80bdd.th",
+                                "a1d90b5c-ae9d2452.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/a1d90b5c-ae9d2452.th",
+                                "5d2d6c55-db83574e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/5d2d6c55-db83574e.th",
+                                "cfa93e08-61801ae1.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/cfa93e08-61801ae1.th",
+                                "mdx_extra.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_extra.yaml"
+                                },
+                "Demucs v3: mdx_extra_q": {
+                                "83fc094f-4a16d450.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/83fc094f-4a16d450.th",
+                                "464b36d7-e5a9386e.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/464b36d7-e5a9386e.th",
+                                "14fc6a69-a89dd0ee.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/14fc6a69-a89dd0ee.th",
+                                "7fd6ef75-a905dd85.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/7fd6ef75-a905dd85.th",
+                                "mdx_extra_q.yaml": "https://raw.githubusercontent.com/facebookresearch/demucs/main/demucs/remote/mdx_extra_q.yaml"
+                                },
+                "Demucs v3: UVR Model":{
+                                "ebf34a2db.th": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/ebf34a2db.th",
+                                "UVR_Demucs_Model_1.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/UVR_Demucs_Model_1.yaml"
+                                },
+                "Demucs v3: repro_mdx_a":{
+                                "9a6b4851-03af0aa6.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/9a6b4851-03af0aa6.th",
+                                "1ef250f1-592467ce.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/1ef250f1-592467ce.th",
+                                "fa0cb7f9-100d8bf4.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/fa0cb7f9-100d8bf4.th",
+                                "902315c2-b39ce9c9.th": "https://dl.fbaipublicfiles.com/demucs/mdx_final/902315c2-b39ce9c9.th",
+                                "repro_mdx_a.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a.yaml"
+                                },
+                "Demucs v3: repro_mdx_a_time_only":{
+                                "9a6b4851-03af0aa6.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/9a6b4851-03af0aa6.th",
+                                "1ef250f1-592467ce.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/1ef250f1-592467ce.th",
+                                "repro_mdx_a_time_only.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a_time_only.yaml"
+                                },
+                "Demucs v3: repro_mdx_a_hybrid_only":{
+                                "fa0cb7f9-100d8bf4.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/fa0cb7f9-100d8bf4.th",
+                                "902315c2-b39ce9c9.th":"https://dl.fbaipublicfiles.com/demucs/mdx_final/902315c2-b39ce9c9.th",
+                                "repro_mdx_a_hybrid_only.yaml": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/repro_mdx_a_hybrid_only.yaml"
+                                },
+                "Demucs v2: demucs": {
+                                "demucs-e07c671f.th": "https://dl.fbaipublicfiles.com/demucs/v3.0/demucs-e07c671f.th"
+                                },
+                "Demucs v2: demucs_extra": {
+                                "demucs_extra-3646af93.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs_extra-3646af93.th"
+                                },
+                "Demucs v2: demucs48_hq": {
+                                "demucs48_hq-28a1282c.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs48_hq-28a1282c.th"
+                                },
+                "Demucs v2: tasnet": {
+                                "tasnet-beb46fac.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/tasnet-beb46fac.th"
+                                },
+                "Demucs v2: tasnet_extra": {
+                                "tasnet_extra-df3777b2.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/tasnet_extra-df3777b2.th"
+                                },
+                "Demucs v2: demucs_unittest": {
+                                "demucs_unittest-09ebc15f.th":"https://dl.fbaipublicfiles.com/demucs/v3.0/demucs_unittest-09ebc15f.th"
+                                },
+                "Demucs v1: demucs": {
+                                "demucs.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/demucs.th"
+                                },
+                "Demucs v1: demucs_extra": {
+                                "demucs_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/demucs_extra.th"
+                                },
+                "Demucs v1: light": {
+                                "light.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/light.th"
+                                },
+                "Demucs v1: light_extra": {
+                                "light_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/light_extra.th"
+                                },
+                "Demucs v1: tasnet": {
+                                "tasnet.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/tasnet.th"
+                                },
+                "Demucs v1: tasnet_extra": {
+                                "tasnet_extra.th":"https://dl.fbaipublicfiles.com/demucs/v2.0/tasnet_extra.th"
+                                }
+                },
+    "mdx_download_vip_list": {
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_340": "UVR-MDX-NET_Main_340.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_390": "UVR-MDX-NET_Main_390.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_406": "UVR-MDX-NET_Main_406.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_427": "UVR-MDX-NET_Main_427.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Main_438": "UVR-MDX-NET_Main_438.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Inst_82_beta": "UVR-MDX-NET_Inst_82_beta.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Inst_90_beta": "UVR-MDX-NET_Inst_90_beta.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET_Inst_187_beta": "UVR-MDX-NET_Inst_187_beta.onnx",
+                "MDX-Net Model VIP: UVR-MDX-NET-Inst_full_292": "UVR-MDX-NET-Inst_full_292.onnx"
+                },
+    "mdx23_download_list": {
+                "MDX23C Model: MDX23C_D1581": {"MDX23C_D1581.ckpt":"model_2_stem_061321.yaml"}
+                },
+    "mdx23c_download_list": {
+                "MDX23C Model: MDX23C-InstVoc HQ": {"MDX23C-8KFFT-InstVoc_HQ.ckpt":"model_2_stem_full_band_8k.yaml"}
+                },
+    "roformer_download_list": {
+                "Roformer Model: BS-Roformer-Viperx-1297": {"model_bs_roformer_ep_317_sdr_12.9755.ckpt":"model_bs_roformer_ep_317_sdr_12.9755.yaml"},
+                "Roformer Model: BS-Roformer-Viperx-1296": {"model_bs_roformer_ep_368_sdr_12.9628.ckpt":"model_bs_roformer_ep_368_sdr_12.9628.yaml"},
+                "Roformer Model: BS-Roformer-Viperx-1053": {"model_bs_roformer_ep_937_sdr_10.5309.ckpt":"model_bs_roformer_ep_937_sdr_10.5309.yaml"},
+                "Roformer Model: Mel-Roformer-Viperx-1143": {"model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt":"model_mel_band_roformer_ep_3005_sdr_11.4360.yaml"}
+                },
+    "mdx23c_download_vip_list": {
+            "MDX23C Model VIP: MDX23C_D1581": {"MDX23C_D1581.ckpt":"model_2_stem_061321.yaml"},
+            "MDX23C Model VIP: MDX23C-InstVoc HQ 2": {"MDX23C-8KFFT-InstVoc_HQ_2.ckpt":"model_2_stem_full_band_8k.yaml"}
+            },
+    "vr_download_vip_list": [],
+    "demucs_download_vip_list": []
+}

audio_separator/mdx_model_data.json ADDED Viewed

	@@ -0,0 +1,384 @@

+{
+    "0ddfc0eb5792638ad5dc27850236c246": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "26d308f91f3423a67dc69a6d12a8793d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "2cdd429caac38f0194b133884160f2c6": {
+        "compensate": 1.045,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "2f5501189a2f6db6349916fabe8c90de": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals",
+        "is_karaoke": true
+    },
+    "398580b6d5d973af3120df54cee6759d": {
+        "compensate": 1.75,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "488b3e6f8bd3717d9d7c428476be2d75": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "4910e7827f335048bdac11fa967772f9": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 7,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "53c4baf4d12c3e6c3831bb8f5b532b93": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5d343409ef0df48c7d78cce9f0106781": {
+        "compensate": 1.075,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "5f6483271e1efb9bfb59e4a3e6d4d098": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "65ab5919372a128e4167f5e01a8fda85": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 8192,
+        "primary_stem": "Other"
+    },
+    "6703e39f36f18aa7855ee1047765621d": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "6b31de20e84392859a3d09d43f089515": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "867595e9de46f6ab699008295df62798": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "a3cd63058945e777505c01d2507daf37": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "b33d9b3950b6cbf5fe90a32608924700": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "c3b29bdce8c4fa17ec609e16220330ab": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 16384,
+        "primary_stem": "Bass"
+    },
+    "ceed671467c1f64ebdfac8a2490d0d52": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d2a1376f310e4f7fa37fb9b5774eb701": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "d7bff498db9324db933d913388cba6be": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "d94058f8c7f1fae4164868ae8ae66b20": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Vocals"
+    },
+    "dc41ede5961d50f277eb846db17f5319": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 4096,
+        "primary_stem": "Drums"
+    },
+    "e5572e58abf111f80d8241d2e44e7fa4": {
+        "compensate": 1.028,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "e7324c873b1f615c35c1967f912db92a": {
+        "compensate": 1.03,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1c56ec0224f1d559c42fd6fd2a67b154": {
+        "compensate": 1.025,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "f2df6d6863d8f435436d8b561594ff49": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "b06327a00d5e5fbc7d96e1781bbdb596": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "94ff780b977d3ca07c7a343dab2e25dd": {
+        "compensate": 1.039,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "73492b58195c3b52d34590d5474452f6": {
+        "compensate": 1.043,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "970b3f9492014d18fefeedfe4773cb42": {
+        "compensate": 1.009,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "1d64a6d2c30f709b8c9b4ce1366d96ee": {
+        "compensate": 1.065,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "203f2a3955221b64df85a41af87cf8f0": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "291c2049608edb52648b96e27eb80e95": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "ead8d05dab12ec571d67549b3aab03fc": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cc63408db3d80b4d85b0287d1d7c9632": {
+        "compensate": 1.033,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "cd5b2989ad863f116c855db1dfe24e39": {
+        "compensate": 1.035,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 9,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Reverb"
+    },
+    "55657dd70583b0fedfba5f67df11d711": {
+        "compensate": 1.022,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 6144,
+        "primary_stem": "Instrumental"
+    },
+    "b6bccda408a436db8500083ef3491e8b": {
+        "compensate": 1.02,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "8a88db95c7fb5dbe6a095ff2ffb428b1": {
+        "compensate": 1.026,
+        "mdx_dim_f_set": 2048,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b78da4afc6512f98e4756f5977f5c6b9": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Instrumental"
+    },
+    "77d07b2667ddf05b9e3175941b4454a0": {
+        "compensate": 1.021,
+        "mdx_dim_f_set": 3072,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 7680,
+        "primary_stem": "Vocals"
+    },
+    "0f2a6bc5b49d87d64728ee40e23bceb1": {
+        "compensate": 1.019,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "Instrumental"
+    },
+    "b02be2d198d4968a121030cf8950b492": {
+        "compensate": 1.020,
+        "mdx_dim_f_set": 2560,
+        "mdx_dim_t_set": 8,
+        "mdx_n_fft_scale_set": 5120,
+        "primary_stem": "No Crowd"
+    },
+    "2154254ee89b2945b97a7efed6e88820": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "063aadd735d58150722926dcbf5852a9": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "c09f714d978b41d718facfe3427e6001": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe96801369f6a148df2720f5ced88c19": {
+        "config_yaml": "model3.yaml"
+    },
+    "02e8b226f85fb566e5db894b9931c640": {
+        "config_yaml": "model2.yaml"
+    },
+    "e3de6d861635ab9c1d766149edd680d6": {
+        "config_yaml": "model1.yaml"
+    },
+    "3f2936c554ab73ce2e396d54636bd373": {
+        "config_yaml": "modelB.yaml"
+    },
+    "890d0f6f82d7574bca741a9e8bcb8168": {
+        "config_yaml": "modelB.yaml"
+    },
+    "63a3cb8c37c474681049be4ad1ba8815": {
+        "config_yaml": "modelB.yaml"
+    },
+    "a7fc5d719743c7fd6b61bd2b4d48b9f0": {
+        "config_yaml": "modelA.yaml"
+    },
+    "3567f3dee6e77bf366fcb1c7b8bc3745": {
+        "config_yaml": "modelA.yaml"
+    },
+    "a28f4d717bd0d34cd2ff7a3b0a3d065e": {
+        "config_yaml": "modelA.yaml"
+    },
+    "c9971a18da20911822593dc81caa8be9": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "57d94d5ed705460d21c75a5ac829a605": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "e7a25f8764f25a52c1b96c4946e66ba2": {
+        "config_yaml": "sndfx.yaml"
+    },
+    "104081d24e37217086ce5fde09147ee1": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "1e6165b601539f38d0a9330f3facffeb": {
+        "config_yaml": "model_2_stem_061321.yaml"
+    },
+    "fe0108464ce0d8271be5ab810891bd7c": {
+        "config_yaml": "model_2_stem_full_band.yaml"
+    },
+    "e9b82ec90ee56c507a3a982f1555714c": {
+        "config_yaml": "model_2_stem_full_band_2.yaml"
+    },
+    "99b6ceaae542265a3b6d657bf9fde79f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "116f6f9dabb907b53d847ed9f7a9475f": {
+        "config_yaml": "model_2_stem_full_band_8k.yaml"
+    },
+    "53f707017bfcbb56f5e1bfac420d6732": {
+        "config_yaml": "model_bs_roformer_ep_317_sdr_12.9755.yaml",
+        "is_roformer": true
+    },
+    "63e41acc264bf681a73aa9f7e5f606cc": {
+        "config_yaml": "model_mel_band_roformer_ep_3005_sdr_11.4360.yaml",
+        "is_roformer": true
+    },
+    "e733736763234047587931fc35322fd9": {
+        "config_yaml": "model_bs_roformer_ep_937_sdr_10.5309.yaml",
+        "is_roformer": true
+    },
+    "d789065adfd747d6f585b27b495bcdae": {
+        "config_yaml": "model_bs_roformer_ep_368_sdr_12.9628.yaml",
+        "is_roformer": true
+    }
+}

audio_separator/vr_model_data.json ADDED Viewed

	@@ -0,0 +1,137 @@

+{
+    "0d0e6d143046b0eecc41a22e60224582": {
+        "vr_model_param": "3band_44100_mid",
+        "primary_stem": "Instrumental"
+    },
+    "18b52f873021a0af556fb4ecd552bb8e": {
+        "vr_model_param": "2band_32000",
+        "primary_stem": "Instrumental"
+    },
+    "1fc66027c82b499c7d8f55f79e64cadc": {
+        "vr_model_param": "2band_32000",
+        "primary_stem": "Instrumental"
+    },
+    "2aa34fbc01f8e6d2bf509726481e7142": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "No Piano"
+    },
+    "3e18f639b11abea7361db1a4a91c2559": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "570b5f50054609a17741369a35007ddd": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Instrumental"
+    },
+    "5a6e24c1b530f2dab045a522ef89b751": {
+        "vr_model_param": "1band_sr44100_hl512",
+        "primary_stem": "Instrumental"
+    },
+    "6b5916069a49be3fe29d4397ecfd73fa": {
+        "vr_model_param": "3band_44100_msb2",
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "74b3bc5fa2b69f29baf7839b858bc679": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "827213b316df36b52a1f3d04fec89369": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "911d4048eee7223eca4ee0efb7d29256": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Vocals"
+    },
+    "941f3f7f0b0341f12087aacdfef644b1": {
+        "vr_model_param": "4band_v2",
+        "primary_stem": "Instrumental"
+    },
+    "a02827cf69d75781a35c0e8a327f3195": {
+        "vr_model_param": "1band_sr33075_hl384",
+        "primary_stem": "Instrumental"
+    },
+    "b165fbff113c959dba5303b74c6484bc": {
+        "vr_model_param": "3band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "b5f988cd3e891dca7253bf5f0f3427c7": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "b99c35723bc35cb11ed14a4780006a80": {
+        "vr_model_param": "1band_sr44100_hl1024",
+        "primary_stem": "Instrumental"
+    },
+    "ba02fd25b71d620eebbdb49e18e4c336": {
+        "vr_model_param": "3band_44100_mid",
+        "primary_stem": "Instrumental"
+    },
+    "c4476ef424d8cba65f38d8d04e8514e2": {
+        "vr_model_param": "3band_44100_msb2",
+        "primary_stem": "Instrumental"
+    },
+    "da2d37b8be2972e550a409bae08335aa": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Vocals"
+    },
+    "db57205d3133e39df8e050b435a78c80": {
+        "vr_model_param": "4band_44100",
+        "primary_stem": "Instrumental"
+    },
+    "ea83b08e32ec2303456fe50659035f69": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Instrumental"
+    },
+    "f6ea8473ff86017b5ebd586ccacf156b": {
+        "vr_model_param": "4band_v2_sn",
+        "primary_stem": "Instrumental",
+        "is_karaoke": true
+    },
+    "fd297a61eafc9d829033f8b987c39a3d": {
+        "vr_model_param": "1band_sr32000_hl512",
+        "primary_stem": "Instrumental"
+    },
+    "0ec76fd9e65f81d8b4fbd13af4826ed8": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Woodwinds"
+    },
+    "0fb9249ffe4ffc38d7b16243f394c0ff": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Reverb"
+    },
+    "6857b2972e1754913aad0c9a1678c753": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Echo",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "f200a145434efc7dcf0cd093f517ed52": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "No Echo",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "44c55d8b5d2e3edea98c2b2bf93071c7": {
+        "vr_model_param": "4band_v3",
+        "primary_stem": "Noise",
+        "nout": 48,
+        "nout_lstm": 128
+    },
+    "51ea8c43a6928ed3c10ef5cb2707d57b": {
+        "vr_model_param": "1band_sr44100_hl1024",
+        "primary_stem": "Noise",
+        "nout": 16,
+        "nout_lstm": 128
+    },
+    "944950a9c5963a5eb70b445d67b7068a": {
+        "vr_model_param": "4band_v3_sn",
+        "primary_stem": "Vocals",
+        "nout": 64,
+        "nout_lstm": 128,
+        "is_karaoke": false,
+        "is_bv_model": true,
+        "is_bv_model_rebalanced": 0.9
+    }
+}

face_analysis/models/1k3d68.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
+size 143607619

face_analysis/models/2d106det.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
+size 5030888

face_analysis/models/face_landmarker_v2_with_blendshapes.task ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
+size 3758596

face_analysis/models/genderage.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
+size 1322532

face_analysis/models/glintr100.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ab1d6435d639628a6f3e5008dd4f929edf4c4124b1a7169e1048f9fef534cdf
+size 260665334

face_analysis/models/scrfd_10g_bnkps.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
+size 16923827

facelib/detection_Resnet50_Final.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1de9c2944f2ccddca5f5e010ea5ae64a39845a86311af6fdf30841b0a5a16d
+size 109497761

facelib/detection_mobilenet0.25_Final.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2979b33ffafda5d74b6948cd7a5b9a7a62f62b949cef24e95fd15d2883a65220
+size 1789735

facelib/parsing_parsenet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d558d8d0e42c20224f13cf5a29c79eba2d59913419f945545d8cf7b72920de2
+size 85331193

facelib/yolov5l-face.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ba9d2125fda4d823df5152b9fc2903c59aa76c0d3771e02bcf13a56a282cf96
+size 186973013

facelib/yolov5n-face.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2bbfbe9f36cf1ec345dc69658d7209e5448a676d946f1bf7818ac50d4489357
+size 7145625

hallo2/net.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcd387cb755079462a91218ac316274f9addc61728925eeee15ae893c89f55c5
+size 4850767602

hallo2/net_g.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14af97eedd53b2a5632def52692b645fbb9306d178afa6c8bece021a60ec7ad1
+size 904732980

motion_module/mm_sd_v15_v2.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:69ed0f5fef82b110aca51bcab73b21104242bc65d6ab4b8b2a2a94d31cad1bf0
+size 1817888431

realesrgan/RealESRGAN_x2plus.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49fafd45f8fd7aa8d31ab2a22d14d91b536c34494a5cfe31eb5d89c2fa266abb
+size 67061725

sd-vae-ft-mse/README.md ADDED Viewed

	@@ -0,0 +1,83 @@

+---
+license: mit
+tags:
+- stable-diffusion
+- stable-diffusion-diffusers
+inference: false
+---
+# Improved Autoencoders
+## Utilizing
+These weights are intended to be used with the [🧨 diffusers library](https://github.com/huggingface/diffusers). If you are looking for the model to use with the original [CompVis Stable Diffusion codebase](https://github.com/CompVis/stable-diffusion), [come here](https://huggingface.co/stabilityai/sd-vae-ft-mse-original).
+#### How to use with 🧨 diffusers
+You can integrate this fine-tuned VAE decoder to your existing `diffusers` workflows, by including a `vae` argument to the `StableDiffusionPipeline`
+```py
+from diffusers.models import AutoencoderKL
+from diffusers import StableDiffusionPipeline
+model = "CompVis/stable-diffusion-v1-4"
+vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse")
+pipe = StableDiffusionPipeline.from_pretrained(model, vae=vae)
+```
+## Decoder Finetuning
+We publish two kl-f8 autoencoder versions, finetuned from the original [kl-f8 autoencoder](https://github.com/CompVis/latent-diffusion#pretrained-autoencoding-models) on a 1:1 ratio of [LAION-Aesthetics](https://laion.ai/blog/laion-aesthetics/) and LAION-Humans, an unreleased subset containing only SFW images of humans. The intent was to fine-tune on the Stable Diffusion training set (the autoencoder was originally trained on OpenImages) but also enrich the dataset with images of humans to improve the reconstruction of faces.
+The first, _ft-EMA_, was resumed from the original checkpoint, trained for 313198 steps and uses EMA weights. It uses the same loss configuration as the original checkpoint (L1 + LPIPS).
+The second, _ft-MSE_, was resumed from _ft-EMA_ and uses EMA weights and was trained for another 280k steps using a different loss, with more emphasis
+on MSE reconstruction (MSE + 0.1 * LPIPS). It produces somewhat ``smoother'' outputs. The batch size for both versions was 192 (16 A100s, batch size 12 per GPU).
+To keep compatibility with existing models, only the decoder part was finetuned; the checkpoints can be used as a drop-in replacement for the existing autoencoder.
+_Original kl-f8 VAE vs f8-ft-EMA vs f8-ft-MSE_
+## Evaluation
+### COCO 2017 (256x256, val, 5000 images)
+| Model    | train steps | rFID | PSNR         | SSIM          | PSIM          | Link                                                                              | Comments
+|----------|---------|------|--------------|---------------|---------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
+|          |         |      |              |               |               |                                                                                   |                                                                                                 |
+| original | 246803        | 4.99 | 23.4 +/- 3.8 | 0.69 +/- 0.14 | 1.01 +/- 0.28 | https://ommer-lab.com/files/latent-diffusion/kl-f8.zip                            | as used in SD                                                                                   |
+| ft-EMA   | 560001        | 4.42 | 23.8 +/- 3.9 | 0.69 +/- 0.13 | 0.96 +/- 0.27 | https://huggingface.co/stabilityai/sd-vae-ft-ema-original/resolve/main/vae-ft-ema-560000-ema-pruned.ckpt | slightly better overall, with EMA                                                               |
+| ft-MSE   | 840001        | 4.70 | 24.5 +/- 3.7 | 0.71 +/- 0.13 | 0.92 +/- 0.27 | https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.ckpt | resumed with EMA from ft-EMA, emphasis on MSE (rec. loss = MSE + 0.1 * LPIPS), smoother outputs |
+### LAION-Aesthetics 5+ (256x256, subset, 10000 images)
+| Model    | train steps | rFID | PSNR         | SSIM          | PSIM          | Link                                                                              | Comments
+|----------|-----------|------|--------------|---------------|---------------|-----------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------|
+|          |           |      |              |               |               |                                                                                   |                                                                                                 |
+| original | 246803         | 2.61 | 26.0 +/- 4.4 | 0.81 +/- 0.12 | 0.75 +/- 0.36 | https://ommer-lab.com/files/latent-diffusion/kl-f8.zip                            | as used in SD                                                                                   |
+| ft-EMA   | 560001          | 1.77 | 26.7 +/- 4.8 | 0.82 +/- 0.12 | 0.67 +/- 0.34 | https://huggingface.co/stabilityai/sd-vae-ft-ema-original/resolve/main/vae-ft-ema-560000-ema-pruned.ckpt | slightly better overall, with EMA                                                               |
+| ft-MSE   | 840001          | 1.88 | 27.3 +/- 4.7 | 0.83 +/- 0.11 | 0.65 +/- 0.34 | https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.ckpt | resumed with EMA from ft-EMA, emphasis on MSE (rec. loss = MSE + 0.1 * LPIPS), smoother outputs |
+### Visual
+_Visualization of reconstructions on  256x256 images from the COCO2017 validation dataset._
+<p align="center">
+  <br>
+  <b>
+256x256: ft-EMA (left), ft-MSE (middle), original (right)</b>
+</p>
+<p align="center">
+<img src=https://huggingface.co/stabilityai/stable-diffusion-decoder-finetune/resolve/main/eval/ae-decoder-tuning-reconstructions/merged/00025_merged.png />
+</p>
+<p align="center">
+<img src=https://huggingface.co/stabilityai/stable-diffusion-decoder-finetune/resolve/main/eval/ae-decoder-tuning-reconstructions/merged/00011_merged.png />
+</p>
+<p align="center">
+<img src=https://huggingface.co/stabilityai/stable-diffusion-decoder-finetune/resolve/main/eval/ae-decoder-tuning-reconstructions/merged/00037_merged.png />
+</p>
+<p align="center">
+<img src=https://huggingface.co/stabilityai/stable-diffusion-decoder-finetune/resolve/main/eval/ae-decoder-tuning-reconstructions/merged/00043_merged.png />
+</p>
+<p align="center">
+<img src=https://huggingface.co/stabilityai/stable-diffusion-decoder-finetune/resolve/main/eval/ae-decoder-tuning-reconstructions/merged/00053_merged.png />
+</p>
+<p align="center">
+<img src=https://huggingface.co/stabilityai/stable-diffusion-decoder-finetune/resolve/main/eval/ae-decoder-tuning-reconstructions/merged/00029_merged.png />
+</p>

sd-vae-ft-mse/config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.4.2",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 256,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

sd-vae-ft-mse/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
+size 334643276

stable-diffusion-v1-5/unet/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.6.0",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ]
+}

stable-diffusion-v1-5/unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19da7aaa4b880e59d56843f1fcb4dd9b599c28a1d9d9af7c1143057c8ffae9f1
+size 3438167540

wav2vec/wav2vec2-base-960h/README.md ADDED Viewed

	@@ -0,0 +1,128 @@

+---
+language: en
+datasets:
+- librispeech_asr
+tags:
+- audio
+- automatic-speech-recognition
+- hf-asr-leaderboard
+license: apache-2.0
+widget:
+- example_title: Librispeech sample 1
+  src: https://cdn-media.huggingface.co/speech_samples/sample1.flac
+- example_title: Librispeech sample 2
+  src: https://cdn-media.huggingface.co/speech_samples/sample2.flac
+model-index:
+- name: wav2vec2-base-960h
+  results:
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: LibriSpeech (clean)
+      type: librispeech_asr
+      config: clean
+      split: test
+      args:
+        language: en
+    metrics:
+    - name: Test WER
+      type: wer
+      value: 3.4
+  - task:
+      name: Automatic Speech Recognition
+      type: automatic-speech-recognition
+    dataset:
+      name: LibriSpeech (other)
+      type: librispeech_asr
+      config: other
+      split: test
+      args:
+        language: en
+    metrics:
+    - name: Test WER
+      type: wer
+      value: 8.6
+---
+# Wav2Vec2-Base-960h
+[Facebook's Wav2Vec2](https://ai.facebook.com/blog/wav2vec-20-learning-the-structure-of-speech-from-raw-audio/)
+The base model pretrained and fine-tuned on 960 hours of Librispeech on 16kHz sampled speech audio. When using the model
+make sure that your speech input is also sampled at 16Khz.
+[Paper](https://arxiv.org/abs/2006.11477)
+Authors: Alexei Baevski, Henry Zhou, Abdelrahman Mohamed, Michael Auli
+**Abstract**
+We show for the first time that learning powerful representations from speech audio alone followed by fine-tuning on transcribed speech can outperform the best semi-supervised methods while being conceptually simpler. wav2vec 2.0 masks the speech input in the latent space and solves a contrastive task defined over a quantization of the latent representations which are jointly learned. Experiments using all labeled data of Librispeech achieve 1.8/3.3 WER on the clean/other test sets. When lowering the amount of labeled data to one hour, wav2vec 2.0 outperforms the previous state of the art on the 100 hour subset while using 100 times less labeled data. Using just ten minutes of labeled data and pre-training on 53k hours of unlabeled data still achieves 4.8/8.2 WER. This demonstrates the feasibility of speech recognition with limited amounts of labeled data.
+The original model can be found under https://github.com/pytorch/fairseq/tree/master/examples/wav2vec#wav2vec-20.
+# Usage
+To transcribe audio files the model can be used as a standalone acoustic model as follows:
+```python
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+ from datasets import load_dataset
+ import torch
+ # load model and tokenizer
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+ # load dummy dataset and read soundfiles
+ ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
+ # tokenize
+ input_values = processor(ds[0]["audio"]["array"], return_tensors="pt", padding="longest").input_values  # Batch size 1
+ # retrieve logits
+ logits = model(input_values).logits
+ # take argmax and decode
+ predicted_ids = torch.argmax(logits, dim=-1)
+ transcription = processor.batch_decode(predicted_ids)
+ ```
+ ## Evaluation
+ This code snippet shows how to evaluate **facebook/wav2vec2-base-960h** on LibriSpeech's "clean" and "other" test data.
+```python
+from datasets import load_dataset
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+import torch
+from jiwer import wer
+librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
+model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h").to("cuda")
+processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
+def map_to_pred(batch):
+    input_values = processor(batch["audio"]["array"], return_tensors="pt", padding="longest").input_values
+    with torch.no_grad():
+        logits = model(input_values.to("cuda")).logits
+    predicted_ids = torch.argmax(logits, dim=-1)
+    transcription = processor.batch_decode(predicted_ids)
+    batch["transcription"] = transcription
+    return batch
+result = librispeech_eval.map(map_to_pred, batched=True, batch_size=1, remove_columns=["audio"])
+print("WER:", wer(result["text"], result["transcription"]))
+```
+*Result (WER)*:
+| "clean" | "other" |
+|---|---|
+| 3.4 | 8.6 |

wav2vec/wav2vec2-base-960h/config.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "_name_or_path": "facebook/wav2vec2-base-960h",
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "codevector_dim": 256,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.1,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.1,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_prob": 0.05,
+  "model_type": "wav2vec2",
+  "num_attention_heads": 12,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "num_negatives": 100,
+  "pad_token_id": 0,
+  "proj_codevector_dim": 256,
+  "transformers_version": "4.7.0.dev0",
+  "vocab_size": 32
+}

wav2vec/wav2vec2-base-960h/feature_extractor_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "do_normalize": true,
+  "feature_dim": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

wav2vec/wav2vec2-base-960h/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa76ab2243c81747a1f832954586bc566090c83a0ac167df6f31f0fa917d74a
+size 377607901

wav2vec/wav2vec2-base-960h/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+  "do_normalize": true,
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

wav2vec/wav2vec2-base-960h/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}

wav2vec/wav2vec2-base-960h/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "return_attention_mask": false, "do_normalize": true}

wav2vec/wav2vec2-base-960h/vocab.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"<pad>": 0, "<s>": 1, "</s>": 2, "<unk>": 3, "\|": 4, "E": 5, "T": 6, "A": 7, "O": 8, "N": 9, "I": 10, "H": 11, "S": 12, "R": 13, "D": 14, "L": 15, "U": 16, "M": 17, "W": 18, "C": 19, "F": 20, "G": 21, "Y": 22, "P": 23, "B": 24, "V": 25, "K": 26, "'": 27, "X": 28, "J": 29, "Q": 30, "Z": 31}