Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| class MedS_Llama3: | |
| def __init__(self, model_path: str): | |
| # 加载模型到CPU | |
| self.model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| device_map='cpu', # 指定加载到CPU | |
| torch_dtype=torch.float32 # 使用标准的float32精度 | |
| ) | |
| self.model.config.pad_token_id = self.model.config.eos_token_id = 128009 | |
| self.tokenizer = AutoTokenizer.from_pretrained( | |
| model_path, | |
| model_max_length=2048, | |
| padding_side="right" | |
| ) | |
| self.tokenizer.pad_token = self.tokenizer.eos_token | |
| self.model.eval() | |
| print('Model and tokenizer loaded on CPU!') | |
| def chat(self, query: str, instruction: str, max_output_tokens: int) -> str: | |
| input_sentence = f"{instruction}\n\n{query}" | |
| input_tokens = self.tokenizer( | |
| input_sentence, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True | |
| ) | |
| output = self.model.generate( | |
| **input_tokens, | |
| max_new_tokens=max_output_tokens, | |
| eos_token_id=128009 | |
| ) | |
| generated_text = self.tokenizer.decode( | |
| output[0][input_tokens['input_ids'].shape[1]:], | |
| skip_special_tokens=True | |
| ) | |
| return generated_text.strip() | |
| # 实例化模型 | |
| model_path = "Henrychur/MMedS-Llama-3-8B" # 确保这里是模型的正确路径 | |
| chat_model = MedS_Llama3(model_path) | |
| # 定义 Gradio 接口中使用的响应函数 | |
| def respond(message, system_message, max_output_tokens): | |
| # 每次对话结束后清空历史,只使用当前输入和系统指令 | |
| response = chat_model.chat(query=message, instruction=system_message, max_output_tokens=max_output_tokens) | |
| yield response | |
| # 设置 Gradio 聊天界面 | |
| demo = gr.Interface( | |
| fn=respond, | |
| inputs=[ | |
| gr.Textbox(value="What is the treatment for diabetes?", label="Your Input"), | |
| gr.Textbox(value="If you are a doctor, please perform clinical consulting with the patient.", label="System message"), | |
| gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max Output Tokens") | |
| ], | |
| outputs="text" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |