Spaces:
Runtime error
Runtime error
| import sqlite3, json | |
| from contextlib import closing | |
| # change THIS | |
| output_dir = 'faiss_qa_2023-08-20' | |
| model_name = "multi-qa-MiniLM-L6-cos-v1" | |
| punctuation = '!"#\'(),:;?[]^`}{' | |
| punctuation2 = '-/&._~+*=@<>[]\\' | |
| remove_punctuation = str.maketrans(punctuation2, ' ' * len(punctuation2), punctuation) | |
| def load_questions(sqlite_filename): | |
| all_questions = [] | |
| with closing(sqlite3.connect(sqlite_filename)) as db: | |
| db.row_factory = sqlite3.Row | |
| with closing(db.cursor()) as cursor: | |
| results = cursor.execute( | |
| "SELECT id, articleId, title, category, section, questions FROM articles WHERE articleType = ? AND doNotUse IS NULL OR doNotUse = 0", | |
| ('article',) | |
| ).fetchall() | |
| for res in results: | |
| section = res['section'].lower() | |
| title = res['title'].lower() | |
| if section == 'служебная информация': | |
| section = '' | |
| title = '' | |
| questions = json.loads(res['questions']) | |
| for q in questions: | |
| q['query'] = " ".join(section.split() + title.split() + q['question'].split()).translate(remove_punctuation).lower() | |
| q['articleId'] = res['articleId'] | |
| all_questions += questions | |
| return all_questions | |
| print("Loading questions from db...") | |
| questions = load_questions("omnidesk-ai-chatgpt-questions.sqlite") | |
| # print(questions[0]) | |
| from langchain.vectorstores import FAISS | |
| from langchain.docstore.document import Document | |
| from langchain.embeddings import SentenceTransformerEmbeddings | |
| docs = [ | |
| Document(page_content=q['query'], metadata={ 'answer': q['answer'], 'articleId': q['articleId'] }) | |
| for q in questions | |
| ] | |
| print(f"Loading embeddings model {model_name}...") | |
| embeddings = SentenceTransformerEmbeddings(model_name=model_name) | |
| print("embedding documents...") | |
| db = FAISS.from_documents(docs, embeddings) | |
| db.save_local(output_dir) | |
| print('Saved!') |