John6666 commited on
Commit
fdd975b
·
verified ·
1 Parent(s): e13b172

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +9 -6
  2. app.py +93 -0
  3. requirements.txt +8 -0
README.md CHANGED
@@ -1,12 +1,15 @@
1
  ---
2
- title: Test Chatbot 2
3
- emoji: 😻
4
- colorFrom: gray
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.43.1
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Test Chatbot 1
3
+ emoji: 💬
4
+ colorFrom: yellow
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.42.0
8
  app_file: app.py
9
  pinned: false
10
+ hf_oauth: true
11
+ hf_oauth_scopes:
12
+ - inference-api
13
  ---
14
 
15
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import gradio as gr
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, TorchAoConfig
4
+ from threading import Thread
5
+ import torch
6
+ from torchao.quantization import Int8DynamicActivationInt8WeightConfig, Int8WeightOnlyConfig
7
+
8
+ quant_config = Int8DynamicActivationInt8WeightConfig()
9
+ #quant_config = Int8WeightOnlyConfig()
10
+ quantization_config = TorchAoConfig(quant_type=quant_config)
11
+ #checkpoint = "HuggingFaceTB/SmolLM2-135M-Instruct"
12
+ checkpoint = "unsloth/gemma-3-4b-it"
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
15
+ #model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32).to(device)
16
+ model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
17
+ device_map=device, quantization_config=quantization_config)
18
+
19
+ @spaces.GPU(duration=30)
20
+ def respond_stream(message, history, system_message, max_tokens, temperature, top_p):
21
+ messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
22
+
23
+ input_ids = tokenizer.apply_chat_template(
24
+ messages,
25
+ tokenize=True,
26
+ add_generation_prompt=True,
27
+ return_tensors="pt",
28
+ ).to(model.device)
29
+
30
+ streamer = TextIteratorStreamer(
31
+ tokenizer, skip_prompt=True, skip_special_tokens=True
32
+ )
33
+ gen_kwargs = dict(
34
+ inputs=input_ids,
35
+ streamer=streamer,
36
+ max_new_tokens=max_tokens,
37
+ do_sample=True,
38
+ temperature=temperature,
39
+ top_p=top_p,
40
+ eos_token_id=tokenizer.eos_token_id,
41
+ cache_implementation="static",
42
+ )
43
+ thread = Thread(target=model.generate, kwargs=gen_kwargs)
44
+ thread.start()
45
+
46
+ partial = ""
47
+ for piece in streamer:
48
+ partial += piece
49
+ yield partial
50
+
51
+ @spaces.GPU(duration=30)
52
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
53
+ messages = [{"role": "system", "content": system_message}] + history + [{"role": "user", "content": message}]
54
+
55
+ input_ids = tokenizer.apply_chat_template(
56
+ messages,
57
+ tokenize=True,
58
+ add_generation_prompt=True,
59
+ return_tensors="pt",
60
+ ).to(model.device)
61
+
62
+ outputs = model.generate(
63
+ input_ids=input_ids,
64
+ max_new_tokens=max_tokens,
65
+ do_sample=True,
66
+ temperature=temperature,
67
+ top_p=top_p,
68
+ eos_token_id=tokenizer.eos_token_id,
69
+ cache_implementation="static",
70
+ )
71
+
72
+ gen_ids = outputs[0][input_ids.shape[-1]:]
73
+ return tokenizer.decode(gen_ids, skip_special_tokens=True)
74
+
75
+ """
76
+ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
77
+ """
78
+ chatbot = gr.ChatInterface(
79
+ respond,
80
+ type="messages",
81
+ additional_inputs=[
82
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
83
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
84
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
85
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
86
+ ],
87
+ )
88
+
89
+ with gr.Blocks() as demo:
90
+ chatbot.render()
91
+
92
+ if __name__ == "__main__":
93
+ demo.queue().launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub[hf_xet]
2
+ torch
3
+ torchao
4
+ transformers
5
+ accelerate
6
+ peft
7
+ sentencepiece
8
+ pydantic==2.10.6