Fix llama-cpp-python blocking on HF Spaces due to excessive threads
Browse files- Limit effective vCPUs to 2 on HF Spaces free tier (detects 16 but only 2 available)
- Update vCPU detection logic in utils.py with SPACE_ID environment check
- Enhance logging in summarization.py to show detected vs effective vCPUs
- Add HF Spaces limitation indicator in startup messages
- src/summarization.py +5 -1
- src/utils.py +7 -1
src/summarization.py
CHANGED
|
@@ -11,6 +11,8 @@ but llama_cpp directly for LLM calls (better performance).
|
|
| 11 |
import time
|
| 12 |
from functools import lru_cache
|
| 13 |
from typing import Iterator
|
|
|
|
|
|
|
| 14 |
|
| 15 |
from llama_cpp import Llama
|
| 16 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
@@ -19,7 +21,9 @@ from langchain.prompts import PromptTemplate
|
|
| 19 |
from .utils import available_gguf_llms, num_vcpus, s2tw_converter
|
| 20 |
|
| 21 |
# Detection of available logical cores
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
@lru_cache(maxsize=1)
|
|
|
|
| 11 |
import time
|
| 12 |
from functools import lru_cache
|
| 13 |
from typing import Iterator
|
| 14 |
+
import os
|
| 15 |
+
import multiprocessing
|
| 16 |
|
| 17 |
from llama_cpp import Llama
|
| 18 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
|
|
| 21 |
from .utils import available_gguf_llms, num_vcpus, s2tw_converter
|
| 22 |
|
| 23 |
# Detection of available logical cores
|
| 24 |
+
detected_cpus = multiprocessing.cpu_count()
|
| 25 |
+
is_hf_spaces = os.environ.get('SPACE_ID') is not None
|
| 26 |
+
print(f"Detected vCPUs: {detected_cpus}, Effective vCPUs: {num_vcpus}" + (" (HF Spaces limited)" if is_hf_spaces else ""))
|
| 27 |
|
| 28 |
|
| 29 |
@lru_cache(maxsize=1)
|
src/utils.py
CHANGED
|
@@ -7,7 +7,13 @@ from typing import Optional
|
|
| 7 |
import multiprocessing
|
| 8 |
|
| 9 |
# Detect logical cores (vCPUs available to the container)
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
model_names = {
|
| 13 |
"tiny English":"tiny",
|
|
|
|
| 7 |
import multiprocessing
|
| 8 |
|
| 9 |
# Detect logical cores (vCPUs available to the container)
|
| 10 |
+
# On HF Spaces free tier, cpu_count() reports 16 but only 2 are actually available
|
| 11 |
+
detected_cpus = multiprocessing.cpu_count()
|
| 12 |
+
if os.environ.get('SPACE_ID'):
|
| 13 |
+
# HF Spaces free tier limitation
|
| 14 |
+
num_vcpus = min(detected_cpus, 2)
|
| 15 |
+
else:
|
| 16 |
+
num_vcpus = detected_cpus
|
| 17 |
|
| 18 |
model_names = {
|
| 19 |
"tiny English":"tiny",
|