akhaliq HF Staff commited on
Commit
eb8ec5c
·
verified ·
1 Parent(s): f35bf64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -10
app.py CHANGED
@@ -18,11 +18,9 @@ TOP_P = 0.95
18
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
19
  if HF_TOKEN:
20
  try:
21
- # No prints; stays silent if token works or fails
22
  login(token=HF_TOKEN)
23
  except Exception:
24
- # Stay silent to avoid exposing anything to the UI/logs
25
- pass
26
 
27
  # Globals so we only load once
28
  _tokenizer = None
@@ -75,7 +73,13 @@ def generate_stream(message: str, history: List[Tuple[str, str]]):
75
  input_ids = inputs["input_ids"] if isinstance(inputs, dict) else inputs
76
  input_ids = input_ids.to(_device)
77
 
78
- streamer = TextIteratorStreamer(_tokenizer, skip_special_tokens=True)
 
 
 
 
 
 
79
  gen_kwargs = dict(
80
  input_ids=input_ids,
81
  max_new_tokens=MAX_NEW_TOKENS,
@@ -95,12 +99,22 @@ def generate_stream(message: str, history: List[Tuple[str, str]]):
95
  output += new_text
96
  yield output
97
 
98
- demo = gr.ChatInterface(
99
- fn=generate_stream,
100
- chatbot=gr.Chatbot(height=420, label="MobileLLM-Pro"),
101
- title="MobileLLM-Pro — Chat",
102
- description="Streaming chat with facebook/MobileLLM-Pro (instruct)",
103
- )
 
 
 
 
 
 
 
 
 
 
104
 
105
  if __name__ == "__main__":
106
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))
 
18
  HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HUGGINGFACE_TOKEN")
19
  if HF_TOKEN:
20
  try:
 
21
  login(token=HF_TOKEN)
22
  except Exception:
23
+ pass # stay silent
 
24
 
25
  # Globals so we only load once
26
  _tokenizer = None
 
73
  input_ids = inputs["input_ids"] if isinstance(inputs, dict) else inputs
74
  input_ids = input_ids.to(_device)
75
 
76
+ # IMPORTANT: don't stream the prompt (prevents system/user text from appearing)
77
+ streamer = TextIteratorStreamer(
78
+ _tokenizer,
79
+ skip_special_tokens=True,
80
+ skip_prompt=True, # <-- key fix
81
+ )
82
+
83
  gen_kwargs = dict(
84
  input_ids=input_ids,
85
  max_new_tokens=MAX_NEW_TOKENS,
 
99
  output += new_text
100
  yield output
101
 
102
+ with gr.Blocks(title="MobileLLM-Pro — Chat") as demo:
103
+ gr.Markdown(
104
+ """
105
+ # MobileLLM-Pro — Chat
106
+ Streaming chat with facebook/MobileLLM-Pro (instruct)
107
+
108
+ <div style="text-align:center;">
109
+ Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">anycoder</a>
110
+ </div>
111
+ """)
112
+ gr.ChatInterface(
113
+ fn=generate_stream,
114
+ chatbot=gr.Chatbot(height=420, label="MobileLLM-Pro"),
115
+ title=None, # header handled by Markdown above
116
+ description=None,
117
+ )
118
 
119
  if __name__ == "__main__":
120
  demo.launch(server_name="0.0.0.0", server_port=int(os.getenv("PORT", 7860)))