Spaces:

eternal-novice
/

TemplateA

Running

App Files Files Community

Dan Flower commited on Sep 14

Commit

8667228

1 Parent(s): d42f795

deploy: sync model/utils into TemplateA and update Dockerfile (canonical COPY + cache-bust)

Browse files

Files changed (7) hide show

model/download_model.py +0 -28
model/model_runner.py +103 -0
utils/.gitignore +0 -3
utils/config.py +0 -20
utils/flags.py +3 -6
utils/logger.py +0 -17
utils/sanitize.py +0 -26

model/download_model.py DELETED Viewed

@@ -1,28 +0,0 @@
-import os
-from huggingface_hub import hf_hub_download
-# Use the token directly, skip login()
-token = os.environ.get("HF_TOKEN")
-if not token:
-    raise RuntimeError("HF_TOKEN environment variable is missing")
-print("Downloading model with token:", token[:8] + "…")
-model_path = hf_hub_download(
-    repo_id="TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF",
-    filename="tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf",
-    repo_type="model",
-    local_dir="/tmp/models",
-    token=token,
-)
-print("✅ Model downloaded to:", model_path)
-if os.path.exists(model_path):
-    print("🎉 File exists at", model_path)
-else:
-    print("❌ File not found after download!")
-#https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf

model/model_runner.py ADDED Viewed

	@@ -0,0 +1,103 @@

+# model_runner.py
+import os
+import sys
+from typing import List, Optional
+from llama_cpp import Llama
+print(f"[BOOT] model_runner from {__file__}", file=sys.stderr)
+# ---- Phase 2: flags (no behavior change) ------------------------------------
+# Reads LAB_* env toggles; all defaults preserve current behavior.
+try:
+    from utils import flags  # if your package path is different, adjust import
+except Exception:
+    # Fallback inline flags if utils.flags isn't available in this lab
+    def _as_bool(val: Optional[str], default: bool) -> bool:
+        if val is None:
+            return default
+        return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
+    class _F:
+        SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False)   # you don't sanitize today
+        STOPSEQ_ENABLED  = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"),  False)   # extra stops only; defaults off
+        CRITIC_ENABLED   = _as_bool(os.getenv("LAB_CRITIC_ENABLED"),   False)
+        JSON_MODE        = _as_bool(os.getenv("LAB_JSON_MODE"),        False)
+        EVIDENCE_GATE    = _as_bool(os.getenv("LAB_EVIDENCE_GATE"),    False)
+        @staticmethod
+        def snapshot():
+            return {
+                "LAB_SANITIZE_ENABLED": _F.SANITIZE_ENABLED,
+                "LAB_STOPSEQ_ENABLED":  _F.STOPSEQ_ENABLED,
+                "LAB_CRITIC_ENABLED":   _F.CRITIC_ENABLED,
+                "LAB_JSON_MODE":        _F.JSON_MODE,
+                "LAB_EVIDENCE_GATE":    _F.EVIDENCE_GATE,
+            }
+    flags = _F()
+print("[flags] snapshot:", getattr(flags, "snapshot", lambda: {} )(), file=sys.stderr)
+# Optional sanitizer hook (kept no-op unless enabled later)
+def _sanitize(text: str) -> str:
+    # Phase 2: default False -> no behavior change
+    if getattr(flags, "SANITIZE_ENABLED", False):
+        # TODO: wire your real sanitizer in Phase 3+
+        return text.strip()
+    return text
+# Stop sequences: keep today's defaults ALWAYS.
+# If LAB_STOPSEQ_ENABLED=true, add *extra* stops from STOP_SEQUENCES env (comma-separated).
+DEFAULT_STOPS: List[str] = ["\nUser:", "\nAssistant:"]
+def _extra_stops_from_env() -> List[str]:
+    if not getattr(flags, "STOPSEQ_ENABLED", False):
+        return []
+    raw = os.getenv("STOP_SEQUENCES", "")
+    toks = [t.strip() for t in raw.split(",") if t.strip()]
+    return toks
+# ---- Model cache / load ------------------------------------------------------
+_model = None  # module-level cache
+def load_model():
+    global _model
+    if _model is not None:
+        return _model
+    model_path = os.getenv("MODEL_PATH")
+    if not model_path or not os.path.exists(model_path):
+        raise ValueError(f"Model path does not exist or is not set: {model_path}")
+    print(f"[INFO] Loading model from {model_path}")
+    _model = Llama(
+        model_path=model_path,
+        n_ctx=1024,        # short context to reduce memory use
+        n_threads=4,       # number of CPU threads
+        n_gpu_layers=0     # CPU only (Hugging Face free tier)
+    )
+    return _model
+# ---- Inference ---------------------------------------------------------------
+def generate(prompt: str, max_tokens: int = 256) -> str:
+    model = load_model()
+    # Preserve existing default stops; optionally extend with extra ones if flag is on
+    stops = DEFAULT_STOPS + _extra_stops_from_env()
+    output = model(
+        prompt,
+        max_tokens=max_tokens,
+        stop=stops,            # unchanged defaults; may include extra stops if enabled
+        echo=False,
+        temperature=0.7,
+        top_p=0.95,
+    )
+    raw_text = output["choices"][0]["text"]
+    # Preserve current manual truncation by the same default stops (kept intentionally)
+    # Extra stops are also applied here if enabled for consistency.
+    for stop_token in stops:
+        if stop_token and stop_token in raw_text:
+            raw_text = raw_text.split(stop_token)[0]
+    final = _sanitize(raw_text)
+    return final.strip()

utils/.gitignore DELETED Viewed

@@ -1,3 +0,0 @@
-# .gitignore
-logs/
-labsold/

utils/config.py DELETED Viewed

@@ -1,20 +0,0 @@
-### utils/config.py
-# utils/config.py
-LAB_SANITIZE_ENABLED=true
-LAB_STOPSEQ_ENABLED=false
-LAB_CRITIC_ENABLED=false
-LAB_JSON_MODE=false
-LAB_EVIDENCE_GATE=false
-SYSTEM_PROMPT = """You are a helpful AI assistant.
-Answer the user clearly and concisely.
-Each response should consist of only one reply. Do not simulate multiple turns. Never generate 'User:' or 'Assistant:' unless instructed.
-Only respond to the current question. Do not simulate full conversations. Do not invent user inputs. Stay in character as a single-turn assistant."""

utils/flags.py CHANGED Viewed

@@ -1,21 +1,19 @@
-# utils/flags.py
 import os
 from typing import Optional, Dict
-# Optional: load .env if python-dotenv is available (local dev convenience)
 try:
     from dotenv import load_dotenv  # type: ignore
     load_dotenv()
 except Exception:
-    pass  # fine on HF Spaces; variables come from the environment
 def _as_bool(val: Optional[str], default: bool) -> bool:
     if val is None:
         return default
     return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
-# Public flags (read once at import time)
-# Set SANITIZE_ENABLED default to False to avoid any behavior change unless enabled via env
 SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False)
 STOPSEQ_ENABLED  = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"),  False)
 CRITIC_ENABLED   = _as_bool(os.getenv("LAB_CRITIC_ENABLED"),   False)
@@ -23,7 +21,6 @@ JSON_MODE        = _as_bool(os.getenv("LAB_JSON_MODE"),        False)
 EVIDENCE_GATE    = _as_bool(os.getenv("LAB_EVIDENCE_GATE"),    False)
 def snapshot() -> Dict[str, bool]:
-    """Convenience for logging/diagnostics."""
     return {
         "LAB_SANITIZE_ENABLED": SANITIZE_ENABLED,
         "LAB_STOPSEQ_ENABLED":  STOPSEQ_ENABLED,

 import os
 from typing import Optional, Dict
+# Optional: load .env for local dev; harmless on HF Spaces
 try:
     from dotenv import load_dotenv  # type: ignore
     load_dotenv()
 except Exception:
+    pass
 def _as_bool(val: Optional[str], default: bool) -> bool:
     if val is None:
         return default
     return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
+# Defaults preserve current behaviour (all off unless env enabled)
 SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False)
 STOPSEQ_ENABLED  = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"),  False)
 CRITIC_ENABLED   = _as_bool(os.getenv("LAB_CRITIC_ENABLED"),   False)
 EVIDENCE_GATE    = _as_bool(os.getenv("LAB_EVIDENCE_GATE"),    False)
 def snapshot() -> Dict[str, bool]:
     return {
         "LAB_SANITIZE_ENABLED": SANITIZE_ENABLED,
         "LAB_STOPSEQ_ENABLED":  STOPSEQ_ENABLED,

utils/logger.py DELETED Viewed

@@ -1,17 +0,0 @@
-import os
-from datetime import datetime
-def log_interaction(lab_name, user_input, model_output, result):
-    log_dir = "/tmp/logs"  # ✅ Use writable temp location
-    os.makedirs(log_dir, exist_ok=True)
-    log_path = os.path.join(log_dir, "interaction_log.txt")
-    with open(log_path, "a") as f:
-        f.write(
-            f"{datetime.utcnow().isoformat()} | "
-            f"Lab: {lab_name} | "
-            f"Input: {user_input} | "
-            f"Output: {model_output} | "
-            f"Result: {result}\n"
-        )

utils/sanitize.py DELETED Viewed

@@ -1,26 +0,0 @@
-# utils/sanitize.py
-import os
-import re
-from typing import Iterable
-DEFAULT_MARKERS = ("user:", "assistant:", "system:", "human:")
-def _markers_from_env() -> Iterable[str]:
-    raw = os.getenv("LAB_SANITIZE_MARKERS")
-    if not raw:
-        return DEFAULT_MARKERS
-    # comma/semicolon/space separated
-    parts = re.split(r"[,\s;]+", raw.strip())
-    return tuple([p for p in parts if p])
-def sanitize_output(response: str) -> str:
-    """
-    Remove hallucinated dialogue markers (e.g., 'user:', 'assistant:') and all text that follows.
-    Markers are case-insensitive. Configurable via LAB_SANITIZE_MARKERS.
-    """
-    if not response:
-        return response
-    markers = _markers_from_env()
-    # Build a single regex from the configured markers, escaped for safety
-    pattern = r"(" + r"|".join(re.escape(m) for m in markers) + r")"
-    return re.split(pattern, response, flags=re.IGNORECASE)[0].strip()