Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import torch | |
| from huggingface_hub import InferenceClient | |
| from huggingface_hub import login | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| hf_token = os.getenv("TEST") | |
| device = torch.device("cpu") # Force CPU usage | |
| print(device) | |
| if hf_token: | |
| login(hf_token) | |
| else: | |
| print("Erreur : Aucun token Hugging Face trouvé. Ajoute 'TOKEN' dans les secrets du Space.") | |
| # Load the Llama 2 model (Choose an appropriate model: 7B, 13B, or 70B) | |
| MODEL_NAME = "deepseek-ai/deepseek-llm-7b-chat" # Change this if needed | |
| #"" | |
| # Load tokenizer and model (Ensure enough VRAM for large models) | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="cpu", use_auth_token=True) | |
| #Defe | |
| def chat_with_Deepseek(prompt, history=[]): | |
| """Generate response using deepseek """ | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cpu") | |
| output = model.generate(**inputs, max_length=200) | |
| response = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return response | |
| # Create Gradio UI | |
| interface = gr.ChatInterface(fn=chat_with_Deepseek, title="Deepseek llm 7b chat") | |
| # Launch in Hugging Face Spaces | |
| if __name__ == "__main__": | |
| interface.launch() | |