Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
8488f1e
1
Parent(s):
487ed33
fix: flash attention
Browse files- model.py +14 -1
- requirements.txt +0 -1
model.py
CHANGED
|
@@ -15,10 +15,23 @@ logging.basicConfig(
|
|
| 15 |
)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
# Model constants
|
| 19 |
MODEL_ID = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
|
| 20 |
PHI_MODEL_ID = "JacobLinCool/Phi-4-multimodal-instruct-commonvoice-zh-tw"
|
| 21 |
-
USE_FA = torch.cuda.is_available() # Use Flash Attention if CUDA is available
|
| 22 |
|
| 23 |
# Model instances (initialized lazily)
|
| 24 |
pipe: Optional[Pipeline] = None
|
|
|
|
| 15 |
)
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
+
try:
|
| 19 |
+
import subprocess
|
| 20 |
+
|
| 21 |
+
subprocess.run(
|
| 22 |
+
"pip install flash-attn --no-build-isolation",
|
| 23 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
| 24 |
+
shell=True,
|
| 25 |
+
)
|
| 26 |
+
logger.info("Flash Attention installed successfully.")
|
| 27 |
+
USE_FA = True
|
| 28 |
+
except:
|
| 29 |
+
USE_FA = False
|
| 30 |
+
logger.warning("Flash Attention not available. Using standard attention instead.")
|
| 31 |
+
|
| 32 |
# Model constants
|
| 33 |
MODEL_ID = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
|
| 34 |
PHI_MODEL_ID = "JacobLinCool/Phi-4-multimodal-instruct-commonvoice-zh-tw"
|
|
|
|
| 35 |
|
| 36 |
# Model instances (initialized lazily)
|
| 37 |
pipe: Optional[Pipeline] = None
|
requirements.txt
CHANGED
|
@@ -4,4 +4,3 @@ transformers
|
|
| 4 |
accelerate
|
| 5 |
spaces
|
| 6 |
librosa
|
| 7 |
-
flash-attn
|
|
|
|
| 4 |
accelerate
|
| 5 |
spaces
|
| 6 |
librosa
|
|
|