Spaces:
Sleeping
Sleeping
Commit
·
be49a41
1
Parent(s):
7ffa2a6
remove fully
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import spaces
|
|
| 2 |
import gradio as gr
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from qdrant_client import QdrantClient, models
|
| 5 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
from huggingface_hub import login
|
| 8 |
import torch
|
|
@@ -110,18 +110,11 @@ client.add(collection_name="products",
|
|
| 110 |
client.add(collection_name="recipes",
|
| 111 |
documents=recipe_strings)
|
| 112 |
model_name = "LeoLM/leo-hessianai-13b-chat"
|
| 113 |
-
bnb_config = BitsAndBytesConfig(
|
| 114 |
-
load_in_4bit=True, # Use 4-bit quantization
|
| 115 |
-
bnb_4bit_compute_dtype=torch.float16, # Reduce memory usage
|
| 116 |
-
bnb_4bit_use_double_quant=True,
|
| 117 |
-
llm_int8_enable_fp32_cpu_offload=True
|
| 118 |
-
)
|
| 119 |
|
| 120 |
@spaces.GPU
|
| 121 |
def load_model():
|
| 122 |
ankerbot_model = AutoModelForCausalLM.from_pretrained(
|
| 123 |
model_name,
|
| 124 |
-
quantization_config=bnb_config,
|
| 125 |
device_map="cpu",
|
| 126 |
torch_dtype=torch.float16,
|
| 127 |
use_cache=True,
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from huggingface_hub import InferenceClient
|
| 4 |
from qdrant_client import QdrantClient, models
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
from huggingface_hub import login
|
| 8 |
import torch
|
|
|
|
| 110 |
client.add(collection_name="recipes",
|
| 111 |
documents=recipe_strings)
|
| 112 |
model_name = "LeoLM/leo-hessianai-13b-chat"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
@spaces.GPU
|
| 115 |
def load_model():
|
| 116 |
ankerbot_model = AutoModelForCausalLM.from_pretrained(
|
| 117 |
model_name,
|
|
|
|
| 118 |
device_map="cpu",
|
| 119 |
torch_dtype=torch.float16,
|
| 120 |
use_cache=True,
|