Spaces:

JotunnBurton
/

wuwa-bert-vits2

Sleeping

App Files Files Community

JotunnBurton commited on Apr 16

Commit

bc9f98e

verified ·

1 Parent(s): ae30800

Update clap_wrapper.py

Browse files

Files changed (1) hide show

clap_wrapper.py +42 -18

clap_wrapper.py CHANGED Viewed

@@ -1,34 +1,53 @@
 import sys
 import torch
-from huggingface_hub import hf_hub_download
 from transformers import ClapModel, ClapProcessor
 from config import config
-models = dict()
-# กำหนดชื่อโมเดลและโฟลเดอร์ที่ต้องการเก็บ
-model_name = "laion/clap-htsat-fused"
 LOCAL_PATH = "./emotional/clap-htsat-fused"
-# ดาวน์โหลดโมเดลจาก Hugging Face
-hf_hub_download(repo_id=model_name,filename="pytorch_model.bin", cache_dir=LOCAL_PATH, force_download=True)
-# Now load the processor and model from the local directory
-processor = ClapProcessor.from_pretrained(LOCAL_PATH)
 def get_clap_audio_feature(audio_data, device=config.bert_gen_config.device):
-    if sys.platform == "darwin" and torch.backends.mps.is_available() and device == "cpu":
         device = "mps"
     if not device:
         device = "cuda"
-    if device not in models:
         if config.webui_config.fp16_run:
             models[device] = ClapModel.from_pretrained(
-                LOCAL_PATH, torch_dtype=torch.float16
             ).to(device)
         else:
             models[device] = ClapModel.from_pretrained(
-                LOCAL_PATH
             ).to(device)
     with torch.no_grad():
         inputs = processor(
@@ -37,19 +56,24 @@ def get_clap_audio_feature(audio_data, device=config.bert_gen_config.device):
         emb = models[device].get_audio_features(**inputs).float()
     return emb.T
 def get_clap_text_feature(text, device=config.bert_gen_config.device):
-    if sys.platform == "darwin" and torch.backends.mps.is_available() and device == "cpu":
         device = "mps"
     if not device:
         device = "cuda"
-    if device not in models:
         if config.webui_config.fp16_run:
             models[device] = ClapModel.from_pretrained(
-                LOCAL_PATH, torch_dtype=torch.float16
             ).to(device)
         else:
             models[device] = ClapModel.from_pretrained(
-                LOCAL_PATH
             ).to(device)
     with torch.no_grad():
         inputs = processor(text=text, return_tensors="pt").to(device)

 import sys
+import os
 import torch
 from transformers import ClapModel, ClapProcessor
+from huggingface_hub import hf_hub_download
 from config import config
+# กำหนดชื่อและ path ของโมเดล
+HF_REPO_ID = "laion/clap-htsat-fused"
 LOCAL_PATH = "./emotional/clap-htsat-fused"
+# ตรวจสอบว่ามีไฟล์โมเดลใน LOCAL_PATH แล้วหรือยัง ถ้าไม่มีก็ดาวน์โหลด
+def ensure_model_downloaded():
+    os.makedirs(LOCAL_PATH, exist_ok=True)
+    required_files = ["pytorch_model.bin", "config.json", "preprocessor_config.json"]
+    for file in required_files:
+        local_file_path = os.path.join(LOCAL_PATH, file)
+        if not os.path.isfile(local_file_path):
+            print(f"Downloading {file} from {HF_REPO_ID}...")
+            hf_hub_download(
+                repo_id=HF_REPO_ID,
+                filename=file,
+                cache_dir=LOCAL_PATH,
+                force_download=False
+            )
+ensure_model_downloaded()
+# โหลด processor
+models = dict()
+processor = ClapProcessor.from_pretrained(LOCAL_PATH, local_files_only=True)
 def get_clap_audio_feature(audio_data, device=config.bert_gen_config.device):
+    if (
+        sys.platform == "darwin"
+        and torch.backends.mps.is_available()
+        and device == "cpu"
+    ):
         device = "mps"
     if not device:
         device = "cuda"
+    if device not in models.keys():
         if config.webui_config.fp16_run:
             models[device] = ClapModel.from_pretrained(
+                LOCAL_PATH, torch_dtype=torch.float16, local_files_only=True
             ).to(device)
         else:
             models[device] = ClapModel.from_pretrained(
+                LOCAL_PATH, local_files_only=True
             ).to(device)
     with torch.no_grad():
         inputs = processor(
         emb = models[device].get_audio_features(**inputs).float()
     return emb.T
 def get_clap_text_feature(text, device=config.bert_gen_config.device):
+    if (
+        sys.platform == "darwin"
+        and torch.backends.mps.is_available()
+        and device == "cpu"
+    ):
         device = "mps"
     if not device:
         device = "cuda"
+    if device not in models.keys():
         if config.webui_config.fp16_run:
             models[device] = ClapModel.from_pretrained(
+                LOCAL_PATH, torch_dtype=torch.float16, local_files_only=True
             ).to(device)
         else:
             models[device] = ClapModel.from_pretrained(
+                LOCAL_PATH, local_files_only=True
             ).to(device)
     with torch.no_grad():
         inputs = processor(text=text, return_tensors="pt").to(device)