Spaces:

eternal-novice
/

TemplateA

Running

App Files Files Community

Dan Flower commited on Sep 7

Commit

53d10a6

1 Parent(s): d7e8c05

fix(model): Python 3.9 typing; move model_runner to model/; centralize flags in utils/

Browse files

Files changed (1) hide show

model/model_runner.py +0 -101

model/model_runner.py DELETED Viewed

@@ -1,101 +0,0 @@
-# model_runner.py
-import os
-import sys
-from typing import List
-from llama_cpp import Llama
-# ---- Phase 2: flags (no behavior change) ------------------------------------
-# Reads LAB_* env toggles; all defaults preserve current behavior.
-try:
-    from TemplateA.utils import flags  # if your package path is different, adjust import
-except Exception:
-    # Fallback inline flags if template.utils.flags isn't available in this lab
-    def _as_bool(val: str | None, default: bool) -> bool:
-        if val is None:
-            return default
-        return val.strip().lower() in {"1", "true", "yes", "on", "y", "t"}
-    class _F:
-        SANITIZE_ENABLED = _as_bool(os.getenv("LAB_SANITIZE_ENABLED"), False)   # you don't sanitize today
-        STOPSEQ_ENABLED  = _as_bool(os.getenv("LAB_STOPSEQ_ENABLED"),  False)   # extra stops only; defaults off
-        CRITIC_ENABLED   = _as_bool(os.getenv("LAB_CRITIC_ENABLED"),   False)
-        JSON_MODE        = _as_bool(os.getenv("LAB_JSON_MODE"),        False)
-        EVIDENCE_GATE    = _as_bool(os.getenv("LAB_EVIDENCE_GATE"),    False)
-        @staticmethod
-        def snapshot():
-            return {
-                "LAB_SANITIZE_ENABLED": _F.SANITIZE_ENABLED,
-                "LAB_STOPSEQ_ENABLED":  _F.STOPSEQ_ENABLED,
-                "LAB_CRITIC_ENABLED":   _F.CRITIC_ENABLED,
-                "LAB_JSON_MODE":        _F.JSON_MODE,
-                "LAB_EVIDENCE_GATE":    _F.EVIDENCE_GATE,
-            }
-    flags = _F()
-print("[flags] snapshot:", getattr(flags, "snapshot", lambda: {} )(), file=sys.stderr)
-# Optional sanitizer hook (kept no-op unless enabled later)
-def _sanitize(text: str) -> str:
-    # Phase 2: default False -> no behavior change
-    if getattr(flags, "SANITIZE_ENABLED", False):
-        # TODO: wire your real sanitizer in Phase 3+
-        return text.strip()
-    return text
-# Stop sequences: keep today's defaults ALWAYS.
-# If LAB_STOPSEQ_ENABLED=true, add *extra* stops from STOP_SEQUENCES env (comma-separated).
-DEFAULT_STOPS: List[str] = ["\nUser:", "\nAssistant:"]
-def _extra_stops_from_env() -> List[str]:
-    if not getattr(flags, "STOPSEQ_ENABLED", False):
-        return []
-    raw = os.getenv("STOP_SEQUENCES", "")
-    toks = [t.strip() for t in raw.split(",") if t.strip()]
-    return toks
-# ---- Model cache / load ------------------------------------------------------
-_model = None  # module-level cache
-def load_model():
-    global _model
-    if _model is not None:
-        return _model
-    model_path = os.getenv("MODEL_PATH")
-    if not model_path or not os.path.exists(model_path):
-        raise ValueError(f"Model path does not exist or is not set: {model_path}")
-    print(f"[INFO] Loading model from {model_path}")
-    _model = Llama(
-        model_path=model_path,
-        n_ctx=1024,        # short context to reduce memory use
-        n_threads=4,       # number of CPU threads
-        n_gpu_layers=0     # CPU only (Hugging Face free tier)
-    )
-    return _model
-# ---- Inference ---------------------------------------------------------------
-def generate(prompt: str, max_tokens: int = 256) -> str:
-    model = load_model()
-    # Preserve existing default stops; optionally extend with extra ones if flag is on
-    stops = DEFAULT_STOPS + _extra_stops_from_env()
-    output = model(
-        prompt,
-        max_tokens=max_tokens,
-        stop=stops,            # unchanged defaults; may include extra stops if enabled
-        echo=False,
-        temperature=0.7,
-        top_p=0.95,
-    )
-    raw_text = output["choices"][0]["text"]
-    # Preserve current manual truncation by the same default stops (kept intentionally)
-    # Extra stops are also applied here if enabled for consistency.
-    for stop_token in stops:
-        if stop_token and stop_token in raw_text:
-            raw_text = raw_text.split(stop_token)[0]
-    final = _sanitize(raw_text)
-    return final.strip()