Dan Flower commited on
Commit
8667228
Β·
1 Parent(s): d42f795

deploy: sync model/utils into TemplateA and update Dockerfile (canonical COPY + cache-bust)

Browse files
model/download_model.py DELETED
@@ -1,28 +0,0 @@
1
- import os
2
- from huggingface_hub import hf_hub_download
3
-
4
- # Use the token directly, skip login()
5
- token = os.environ.get("HF_TOKEN")
6
-
7
- if not token:
8
- raise RuntimeError("HF_TOKEN environment variable is missing")
9
-
10
- print("Downloading model with token:", token[:8] + "…")
11
-
12
- model_path = hf_hub_download(
13
- repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
14
- filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
15
- repo_type="model",
16
- local_dir="/tmp/models",
17
- token=token,
18
- )
19
-
20
- print("βœ… Model downloaded to:", model_path)
21
-
22
- if os.path.exists(model_path):
23
- print("πŸŽ‰ File exists at", model_path)
24
- else:
25
- print("❌ File not found after download!")
26
-
27
-
28
- #https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model/model_runner.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # model_runner.py
2
+ import os
3
+ import sys
4
+ from typing import List, Optional
5
+ from llama_cpp import Llama
6
+
7
+ print(f"[BOOT] model_runner from {__file__}", file=sys.stderr)
8
+
9
+ # ---- Phase 2: flags (no behavior change) ------------------------------------
10
+ # Reads LAB_* env toggles; all defaults preserve current behavior.
11
+ try:
12
+ from utils import flags # if your package path is different, adjust import
13
+ except Exception:
14
+ # Fallback inline flags if utils.flags isn't available in this lab
15
+ def _as_bool(val: Optional[str], default: bool) -> bool:
16
+ if val is None:
17
+ return default
18
+ return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
19
+ class _F:
20
+ SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False) # you don't sanitize today
21
+ STOPSEQ_ENABLED = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"), False) # extra stops only; defaults off
22
+ CRITIC_ENABLED = _as_bool(os.getenv("LAB_CRITIC_ENABLED"), False)
23
+ JSON_MODE = _as_bool(os.getenv("LAB_JSON_MODE"), False)
24
+ EVIDENCE_GATE = _as_bool(os.getenv("LAB_EVIDENCE_GATE"), False)
25
+ @staticmethod
26
+ def snapshot():
27
+ return {
28
+ "LAB_SANITIZE_ENABLED": _F.SANITIZE_ENABLED,
29
+ "LAB_STOPSEQ_ENABLED": _F.STOPSEQ_ENABLED,
30
+ "LAB_CRITIC_ENABLED": _F.CRITIC_ENABLED,
31
+ "LAB_JSON_MODE": _F.JSON_MODE,
32
+ "LAB_EVIDENCE_GATE": _F.EVIDENCE_GATE,
33
+ }
34
+ flags = _F()
35
+
36
+ print("[flags] snapshot:", getattr(flags, "snapshot", lambda: {} )(), file=sys.stderr)
37
+
38
+ # Optional sanitizer hook (kept no-op unless enabled later)
39
+ def _sanitize(text: str) -> str:
40
+ # Phase 2: default False -> no behavior change
41
+ if getattr(flags, "SANITIZE_ENABLED", False):
42
+ # TODO: wire your real sanitizer in Phase 3+
43
+ return text.strip()
44
+ return text
45
+
46
+ # Stop sequences: keep today's defaults ALWAYS.
47
+ # If LAB_STOPSEQ_ENABLED=true, add *extra* stops from STOP_SEQUENCES env (comma-separated).
48
+ DEFAULT_STOPS: List[str] = ["\nUser:", "\nAssistant:"]
49
+
50
+ def _extra_stops_from_env() -> List[str]:
51
+ if not getattr(flags, "STOPSEQ_ENABLED", False):
52
+ return []
53
+ raw = os.getenv("STOP_SEQUENCES", "")
54
+ toks = [t.strip() for t in raw.split(",") if t.strip()]
55
+ return toks
56
+
57
+ # ---- Model cache / load ------------------------------------------------------
58
+ _model = None # module-level cache
59
+
60
+ def load_model():
61
+ global _model
62
+ if _model is not None:
63
+ return _model
64
+
65
+ model_path = os.getenv("MODEL_PATH")
66
+ if not model_path or not os.path.exists(model_path):
67
+ raise ValueError(f"Model path does not exist or is not set: {model_path}")
68
+
69
+ print(f"[INFO] Loading model from {model_path}")
70
+
71
+ _model = Llama(
72
+ model_path=model_path,
73
+ n_ctx=1024, # short context to reduce memory use
74
+ n_threads=4, # number of CPU threads
75
+ n_gpu_layers=0 # CPU only (Hugging Face free tier)
76
+ )
77
+ return _model
78
+
79
+ # ---- Inference ---------------------------------------------------------------
80
+ def generate(prompt: str, max_tokens: int = 256) -> str:
81
+ model = load_model()
82
+
83
+ # Preserve existing default stops; optionally extend with extra ones if flag is on
84
+ stops = DEFAULT_STOPS + _extra_stops_from_env()
85
+
86
+ output = model(
87
+ prompt,
88
+ max_tokens=max_tokens,
89
+ stop=stops, # unchanged defaults; may include extra stops if enabled
90
+ echo=False,
91
+ temperature=0.7,
92
+ top_p=0.95,
93
+ )
94
+ raw_text = output["choices"][0]["text"]
95
+
96
+ # Preserve current manual truncation by the same default stops (kept intentionally)
97
+ # Extra stops are also applied here if enabled for consistency.
98
+ for stop_token in stops:
99
+ if stop_token and stop_token in raw_text:
100
+ raw_text = raw_text.split(stop_token)[0]
101
+
102
+ final = _sanitize(raw_text)
103
+ return final.strip()
utils/.gitignore DELETED
@@ -1,3 +0,0 @@
1
- # .gitignore
2
- logs/
3
- labsold/
 
 
 
 
utils/config.py DELETED
@@ -1,20 +0,0 @@
1
- ### utils/config.py
2
- # utils/config.py
3
-
4
- LAB_SANITIZE_ENABLED=true
5
-
6
- LAB_STOPSEQ_ENABLED=false
7
-
8
- LAB_CRITIC_ENABLED=false
9
-
10
- LAB_JSON_MODE=false
11
-
12
- LAB_EVIDENCE_GATE=false
13
-
14
- SYSTEM_PROMPT = """You are a helpful AI assistant.
15
-
16
- Answer the user clearly and concisely.
17
-
18
- Each response should consist of only one reply. Do not simulate multiple turns. Never generate 'User:' or 'Assistant:' unless instructed.
19
-
20
- Only respond to the current question. Do not simulate full conversations. Do not invent user inputs. Stay in character as a single-turn assistant."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/flags.py CHANGED
@@ -1,21 +1,19 @@
1
- # utils/flags.py
2
  import os
3
  from typing import Optional, Dict
4
 
5
- # Optional: load .env if python-dotenv is available (local dev convenience)
6
  try:
7
  from dotenv import load_dotenv # type: ignore
8
  load_dotenv()
9
  except Exception:
10
- pass # fine on HF Spaces; variables come from the environment
11
 
12
  def _as_bool(val: Optional[str], default: bool) -> bool:
13
  if val is None:
14
  return default
15
  return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
16
 
17
- # Public flags (read once at import time)
18
- # Set SANITIZE_ENABLED default to False to avoid any behavior change unless enabled via env
19
  SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False)
20
  STOPSEQ_ENABLED = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"), False)
21
  CRITIC_ENABLED = _as_bool(os.getenv("LAB_CRITIC_ENABLED"), False)
@@ -23,7 +21,6 @@ JSON_MODE = _as_bool(os.getenv("LAB_JSON_MODE"), False)
23
  EVIDENCE_GATE = _as_bool(os.getenv("LAB_EVIDENCE_GATE"), False)
24
 
25
  def snapshot() -> Dict[str, bool]:
26
- """Convenience for logging/diagnostics."""
27
  return {
28
  "LAB_SANITIZE_ENABLED": SANITIZE_ENABLED,
29
  "LAB_STOPSEQ_ENABLED": STOPSEQ_ENABLED,
 
 
1
  import os
2
  from typing import Optional, Dict
3
 
4
+ # Optional: load .env for local dev; harmless on HF Spaces
5
  try:
6
  from dotenv import load_dotenv # type: ignore
7
  load_dotenv()
8
  except Exception:
9
+ pass
10
 
11
  def _as_bool(val: Optional[str], default: bool) -> bool:
12
  if val is None:
13
  return default
14
  return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
15
 
16
+ # Defaults preserve current behaviour (all off unless env enabled)
 
17
  SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False)
18
  STOPSEQ_ENABLED = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"), False)
19
  CRITIC_ENABLED = _as_bool(os.getenv("LAB_CRITIC_ENABLED"), False)
 
21
  EVIDENCE_GATE = _as_bool(os.getenv("LAB_EVIDENCE_GATE"), False)
22
 
23
  def snapshot() -> Dict[str, bool]:
 
24
  return {
25
  "LAB_SANITIZE_ENABLED": SANITIZE_ENABLED,
26
  "LAB_STOPSEQ_ENABLED": STOPSEQ_ENABLED,
utils/logger.py DELETED
@@ -1,17 +0,0 @@
1
- import os
2
- from datetime import datetime
3
-
4
- def log_interaction(lab_name, user_input, model_output, result):
5
- log_dir = "/tmp/logs" # βœ… Use writable temp location
6
- os.makedirs(log_dir, exist_ok=True)
7
-
8
- log_path = os.path.join(log_dir, "interaction_log.txt")
9
-
10
- with open(log_path, "a") as f:
11
- f.write(
12
- f"{datetime.utcnow().isoformat()} | "
13
- f"Lab: {lab_name} | "
14
- f"Input: {user_input} | "
15
- f"Output: {model_output} | "
16
- f"Result: {result}\n"
17
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/sanitize.py DELETED
@@ -1,26 +0,0 @@
1
- # utils/sanitize.py
2
- import os
3
- import re
4
- from typing import Iterable
5
-
6
- DEFAULT_MARKERS = ("user:", "assistant:", "system:", "human:")
7
-
8
- def _markers_from_env() -> Iterable[str]:
9
- raw = os.getenv("LAB_SANITIZE_MARKERS")
10
- if not raw:
11
- return DEFAULT_MARKERS
12
- # comma/semicolon/space separated
13
- parts = re.split(r"[,\s;]+", raw.strip())
14
- return tuple([p for p in parts if p])
15
-
16
- def sanitize_output(response: str) -> str:
17
- """
18
- Remove hallucinated dialogue markers (e.g., 'user:', 'assistant:') and all text that follows.
19
- Markers are case-insensitive. Configurable via LAB_SANITIZE_MARKERS.
20
- """
21
- if not response:
22
- return response
23
- markers = _markers_from_env()
24
- # Build a single regex from the configured markers, escaped for safety
25
- pattern = r"(" + r"|".join(re.escape(m) for m in markers) + r")"
26
- return re.split(pattern, response, flags=re.IGNORECASE)[0].strip()