Spaces:

Ankerkraut
/

chatbot-test

Sleeping

Ankerkraut commited on Mar 26

Commit

be49a41

1 Parent(s): 7ffa2a6

remove fully

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import spaces
 import gradio as gr
 from huggingface_hub import InferenceClient
 from qdrant_client import QdrantClient, models
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import login
 import torch
@@ -110,18 +110,11 @@ client.add(collection_name="products",
 client.add(collection_name="recipes",
         documents=recipe_strings)
 model_name = "LeoLM/leo-hessianai-13b-chat"
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,  # Use 4-bit quantization
-    bnb_4bit_compute_dtype=torch.float16,  # Reduce memory usage
-    bnb_4bit_use_double_quant=True,
-    llm_int8_enable_fp32_cpu_offload=True
-)
 @spaces.GPU
 def load_model():
     ankerbot_model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        quantization_config=bnb_config,
         device_map="cpu",
         torch_dtype=torch.float16,
         use_cache=True,

 import gradio as gr
 from huggingface_hub import InferenceClient
 from qdrant_client import QdrantClient, models
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 from sentence_transformers import SentenceTransformer
 from huggingface_hub import login
 import torch
 client.add(collection_name="recipes",
         documents=recipe_strings)
 model_name = "LeoLM/leo-hessianai-13b-chat"
 @spaces.GPU
 def load_model():
     ankerbot_model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",
         torch_dtype=torch.float16,
         use_cache=True,