Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
import os
|
| 2 |
import streamlit as st
|
| 3 |
-
from huggingface_hub import login
|
| 4 |
from datasets import load_dataset
|
| 5 |
from llama_cpp import Llama
|
| 6 |
-
from huggingface_hub import hf_hub_download
|
| 7 |
import chromadb
|
|
|
|
| 8 |
from sentence_transformers import SentenceTransformer
|
| 9 |
|
| 10 |
# Load Hugging Face token from environment variable
|
| 11 |
-
hf_token = os.getenv("HF_TOKEN")
|
| 12 |
if hf_token:
|
| 13 |
login(token=hf_token)
|
| 14 |
else:
|
|
@@ -22,6 +22,7 @@ llm = Llama(
|
|
| 22 |
model_path=hf_hub_download(
|
| 23 |
repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
|
| 24 |
filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
|
|
|
|
| 25 |
),
|
| 26 |
n_ctx=2048,
|
| 27 |
)
|
|
@@ -30,9 +31,13 @@ llm = Llama(
|
|
| 30 |
class VectorStore:
|
| 31 |
def __init__(self, collection_name):
|
| 32 |
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
if collection_name in [c.name for c in self.chroma_client.list_collections()]:
|
| 35 |
-
self.chroma_client.delete_collection(collection_name)
|
| 36 |
self.collection = self.chroma_client.create_collection(name=collection_name)
|
| 37 |
|
| 38 |
def populate_vectors(self, dataset):
|
|
@@ -60,7 +65,7 @@ class VectorStore:
|
|
| 60 |
def search_context(self, query, n_results=1):
|
| 61 |
query_embedding = self.embedding_model.encode([query]).tolist()
|
| 62 |
results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
|
| 63 |
-
return results['documents']
|
| 64 |
|
| 65 |
# Initialize and populate vector store
|
| 66 |
vector_store = VectorStore("embedding_vector")
|
|
|
|
| 1 |
import os
|
| 2 |
import streamlit as st
|
| 3 |
+
from huggingface_hub import login, hf_hub_download
|
| 4 |
from datasets import load_dataset
|
| 5 |
from llama_cpp import Llama
|
|
|
|
| 6 |
import chromadb
|
| 7 |
+
from chromadb.config import Settings # Added import for Settings
|
| 8 |
from sentence_transformers import SentenceTransformer
|
| 9 |
|
| 10 |
# Load Hugging Face token from environment variable
|
| 11 |
+
hf_token = os.getenv("HF_TOKEN")
|
| 12 |
if hf_token:
|
| 13 |
login(token=hf_token)
|
| 14 |
else:
|
|
|
|
| 22 |
model_path=hf_hub_download(
|
| 23 |
repo_id="TheBloke/CapybaraHermes-2.5-Mistral-7B-GGUF",
|
| 24 |
filename="capybarahermes-2.5-mistral-7b.Q2_K.gguf",
|
| 25 |
+
token=hf_token # Ensure the token is passed for authentication
|
| 26 |
),
|
| 27 |
n_ctx=2048,
|
| 28 |
)
|
|
|
|
| 31 |
class VectorStore:
|
| 32 |
def __init__(self, collection_name):
|
| 33 |
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
| 34 |
+
# Use Settings to configure persistence
|
| 35 |
+
self.chroma_client = chromadb.Client(Settings(
|
| 36 |
+
chroma_db_impl="duckdb+parquet",
|
| 37 |
+
persist_directory="./chroma_db" # Ensure persistence
|
| 38 |
+
))
|
| 39 |
if collection_name in [c.name for c in self.chroma_client.list_collections()]:
|
| 40 |
+
self.chroma_client.delete_collection(name=collection_name)
|
| 41 |
self.collection = self.chroma_client.create_collection(name=collection_name)
|
| 42 |
|
| 43 |
def populate_vectors(self, dataset):
|
|
|
|
| 65 |
def search_context(self, query, n_results=1):
|
| 66 |
query_embedding = self.embedding_model.encode([query]).tolist()
|
| 67 |
results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
|
| 68 |
+
return results['documents'][0] # Adjusted to access the correct document
|
| 69 |
|
| 70 |
# Initialize and populate vector store
|
| 71 |
vector_store = VectorStore("embedding_vector")
|