import os import glob from huggingface_hub import snapshot_download from ctransformers import AutoModelForCausalLM import gradio as gr # Use home directory for cache (always writable in Spaces) MODEL_CACHE_DIR = os.environ.get( "MODEL_DIR", os.path.join(os.path.expanduser("~"), "model_cache") ) os.makedirs(MODEL_CACHE_DIR, exist_ok=True) MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF" MODEL_FILE = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf" # Ensure model is downloaded def ensure_model(): local_paths = glob.glob(os.path.join(MODEL_CACHE_DIR, "**", MODEL_FILE), recursive=True) if local_paths: return local_paths[0] print("Downloading model...") repo_path = snapshot_download(repo_id=MODEL_REPO, cache_dir=MODEL_CACHE_DIR) matches = glob.glob(os.path.join(repo_path, "**", MODEL_FILE), recursive=True) if matches: return matches[0] raise FileNotFoundError(f"{MODEL_FILE} not found in {MODEL_REPO}") model_path = ensure_model() # Load model model = AutoModelForCausalLM.from_pretrained( MODEL_REPO, model_file=model_path, model_type="llama", max_new_tokens=256, temperature=0.7 ) # Custom responses custom_data = { "hi": "hello Good morning, I am chatbot Deepika", "about you": "I am Deepika AI Chatbot, how may I help you?", "i love you": "I love you too", "age": "45" } def chatbot(msg): if msg in custom_data: return custom_data[msg] return model(f"### Instruction:\n{msg}\n\n### Response:\n") if __name__ == "__main__": port = int(os.environ.get("PORT", 7860)) gr.Interface( fn=chatbot, inputs="text", outputs="text", title="Custom Chatbot - TinyLlama" ).launch(server_name="0.0.0.0", server_port=port)