Spaces:

droushb
/

NLP_RAG

Sleeping

droushb commited on Dec 8, 2024

Commit

80ff195

1 Parent(s): 8b52ce3

no message

Files changed (2) hide show

config.py CHANGED Viewed

@@ -5,5 +5,5 @@ CONFIG = {
     "CHUNK_SIZE": 200,
     "OPENAI_ENGINE": "gpt-4o-mini",
     "MAX_TOKENS": 500,
-    "TOP_DOCS": 3
 }

     "CHUNK_SIZE": 200,
     "OPENAI_ENGINE": "gpt-4o-mini",
     "MAX_TOKENS": 500,
+    "TOP_DOCS": 5
 }

model/retriever.py CHANGED Viewed

@@ -23,6 +23,9 @@ class Retriever:
     def compute_embeddings(self):
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
         self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
     def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):

     def compute_embeddings(self):
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        # tokenizer = self.model._first_module().tokenizer
+        # if tokenizer.pad_token is None:
+        #     tokenizer.pad_token = tokenizer.eos_token
         self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
     def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):