Luigi commited on
Commit
0a56987
·
1 Parent(s): 81633d5

Fix llama-cpp-python blocking on HF Spaces due to excessive threads

Browse files

- Limit effective vCPUs to 2 on HF Spaces free tier (detects 16 but only 2 available)
- Update vCPU detection logic in utils.py with SPACE_ID environment check
- Enhance logging in summarization.py to show detected vs effective vCPUs
- Add HF Spaces limitation indicator in startup messages

Files changed (2) hide show
  1. src/summarization.py +5 -1
  2. src/utils.py +7 -1
src/summarization.py CHANGED
@@ -11,6 +11,8 @@ but llama_cpp directly for LLM calls (better performance).
11
  import time
12
  from functools import lru_cache
13
  from typing import Iterator
 
 
14
 
15
  from llama_cpp import Llama
16
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -19,7 +21,9 @@ from langchain.prompts import PromptTemplate
19
  from .utils import available_gguf_llms, num_vcpus, s2tw_converter
20
 
21
  # Detection of available logical cores
22
- print(f"Detected vCPUs: {num_vcpus}")
 
 
23
 
24
 
25
  @lru_cache(maxsize=1)
 
11
  import time
12
  from functools import lru_cache
13
  from typing import Iterator
14
+ import os
15
+ import multiprocessing
16
 
17
  from llama_cpp import Llama
18
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
21
  from .utils import available_gguf_llms, num_vcpus, s2tw_converter
22
 
23
  # Detection of available logical cores
24
+ detected_cpus = multiprocessing.cpu_count()
25
+ is_hf_spaces = os.environ.get('SPACE_ID') is not None
26
+ print(f"Detected vCPUs: {detected_cpus}, Effective vCPUs: {num_vcpus}" + (" (HF Spaces limited)" if is_hf_spaces else ""))
27
 
28
 
29
  @lru_cache(maxsize=1)
src/utils.py CHANGED
@@ -7,7 +7,13 @@ from typing import Optional
7
  import multiprocessing
8
 
9
  # Detect logical cores (vCPUs available to the container)
10
- num_vcpus = multiprocessing.cpu_count()
 
 
 
 
 
 
11
 
12
  model_names = {
13
  "tiny English":"tiny",
 
7
  import multiprocessing
8
 
9
  # Detect logical cores (vCPUs available to the container)
10
+ # On HF Spaces free tier, cpu_count() reports 16 but only 2 are actually available
11
+ detected_cpus = multiprocessing.cpu_count()
12
+ if os.environ.get('SPACE_ID'):
13
+ # HF Spaces free tier limitation
14
+ num_vcpus = min(detected_cpus, 2)
15
+ else:
16
+ num_vcpus = detected_cpus
17
 
18
  model_names = {
19
  "tiny English":"tiny",