Spaces:

akhaliq
/

MobileLLM-Pro

Running on Zero

akhaliq HF Staff commited on 7 days ago

Commit

eb8ec5c

verified ·

1 Parent(s): f35bf64

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -18,11 +18,9 @@ TOP_P = 0.95
 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
 if HF_TOKEN:
     try:
-        # No prints; stays silent if token works or fails
         login(token=HF_TOKEN)
     except Exception:
-        # Stay silent to avoid exposing anything to the UI/logs
-        pass
 # Globals so we only load once
 _tokenizer = None
@@ -75,7 +73,13 @@ def generate_stream(message: str, history: List[Tuple[str, str]]):
     input_ids = inputs["input_ids"] if isinstance(inputs, dict) else inputs
     input_ids = input_ids.to(_device)
-    streamer = TextIteratorStreamer(_tokenizer, skip_special_tokens=True)
     gen_kwargs = dict(
         input_ids=input_ids,
         max_new_tokens=MAX_NEW_TOKENS,
@@ -95,12 +99,22 @@ def generate_stream(message: str, history: List[Tuple[str, str]]):
         output += new_text
         yield output
-demo = gr.ChatInterface(
-    fn=generate_stream,
-    chatbot=gr.Chatbot(height=420, label="MobileLLM-Pro"),
-    title="MobileLLM-Pro — Chat",
-    description="Streaming chat with facebook/MobileLLM-Pro (instruct)",
-)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))

 HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
 if HF_TOKEN:
     try:
         login(token=HF_TOKEN)
     except Exception:
+        pass  # stay silent
 # Globals so we only load once
 _tokenizer = None
     input_ids = inputs["input_ids"] if isinstance(inputs, dict) else inputs
     input_ids = input_ids.to(_device)
+    # IMPORTANT: don't stream the prompt (prevents system/user text from appearing)
+    streamer = TextIteratorStreamer(
+        _tokenizer,
+        skip_special_tokens=True,
+        skip_prompt=True,      # <-- key fix
+    )
     gen_kwargs = dict(
         input_ids=input_ids,
         max_new_tokens=MAX_NEW_TOKENS,
         output += new_text
         yield output
+with gr.Blocks(title="MobileLLM-Pro — Chat") as demo:
+    gr.Markdown(
+        """
+# MobileLLM-Pro — Chat
+Streaming chat with facebook/MobileLLM-Pro (instruct)
+<div style="text-align:center;">
+  Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a>
+</div>
+""")
+    gr.ChatInterface(
+        fn=generate_stream,
+        chatbot=gr.Chatbot(height=420, label="MobileLLM-Pro"),
+        title=None,            # header handled by Markdown above
+        description=None,
+    )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))