no message
Browse files- config.py +1 -1
- model/retriever.py +3 -0
config.py
CHANGED
|
@@ -5,5 +5,5 @@ CONFIG = {
|
|
| 5 |
"CHUNK_SIZE": 200,
|
| 6 |
"OPENAI_ENGINE": "gpt-4o-mini",
|
| 7 |
"MAX_TOKENS": 500,
|
| 8 |
-
"TOP_DOCS":
|
| 9 |
}
|
|
|
|
| 5 |
"CHUNK_SIZE": 200,
|
| 6 |
"OPENAI_ENGINE": "gpt-4o-mini",
|
| 7 |
"MAX_TOKENS": 500,
|
| 8 |
+
"TOP_DOCS": 5
|
| 9 |
}
|
model/retriever.py
CHANGED
|
@@ -23,6 +23,9 @@ class Retriever:
|
|
| 23 |
|
| 24 |
def compute_embeddings(self):
|
| 25 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
|
|
|
|
|
|
| 26 |
self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
|
| 27 |
|
| 28 |
def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
|
|
|
|
| 23 |
|
| 24 |
def compute_embeddings(self):
|
| 25 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 26 |
+
# tokenizer = self.model._first_module().tokenizer
|
| 27 |
+
# if tokenizer.pad_token is None:
|
| 28 |
+
# tokenizer.pad_token = tokenizer.eos_token
|
| 29 |
self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
|
| 30 |
|
| 31 |
def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
|