Spaces:
Running
on
T4
Running
on
T4
Update app.py
Browse files
app.py
CHANGED
|
@@ -138,13 +138,74 @@ async def chat(query,history,sources,reports,subtype,year):
|
|
| 138 |
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
|
| 139 |
|
| 140 |
context_retrieved = retriever.invoke(question)
|
|
|
|
|
|
|
| 141 |
|
| 142 |
def format_docs(docs):
|
| 143 |
-
return "
|
| 144 |
|
| 145 |
context_retrieved_formatted = format_docs(context_retrieved)
|
| 146 |
context_retrieved_lst.append(context_retrieved_formatted)
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
yield history,docs_html
|
| 150 |
#process_pdf()
|
|
|
|
| 138 |
search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
|
| 139 |
|
| 140 |
context_retrieved = retriever.invoke(question)
|
| 141 |
+
for doc in context_retrieved:
|
| 142 |
+
print(doc.metadata)
|
| 143 |
|
| 144 |
def format_docs(docs):
|
| 145 |
+
return "|".join(doc.page_content for doc in docs)
|
| 146 |
|
| 147 |
context_retrieved_formatted = format_docs(context_retrieved)
|
| 148 |
context_retrieved_lst.append(context_retrieved_formatted)
|
| 149 |
+
|
| 150 |
+
##-------------------Prompt---------------------------------------------------------------
|
| 151 |
+
SYSTEM_PROMPT = """
|
| 152 |
+
You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
|
| 153 |
+
Guidelines:
|
| 154 |
+
- If the passages have useful facts or numbers, use them in your answer.
|
| 155 |
+
- Documents are separated by "|"
|
| 156 |
+
- When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
|
| 157 |
+
- Do not use the sentence 'Doc i says ...' to say where information came from.
|
| 158 |
+
- If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
|
| 159 |
+
- Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
|
| 160 |
+
- If it makes sense, use bullet points and lists to make your answers easier to understand.
|
| 161 |
+
- You do not need to use every passage. Only use the ones that help answer the question.
|
| 162 |
+
- If the documents do not have the information needed to answer the question, just say you do not have enough information.
|
| 163 |
+
"""
|
| 164 |
+
|
| 165 |
+
USER_PROMPT = """Passages:
|
| 166 |
+
{context}
|
| 167 |
+
-----------------------
|
| 168 |
+
Question: {question} - Explained to audit expert
|
| 169 |
+
Answer in english with the passages citations:
|
| 170 |
+
""".format(context = context_retrieved_lst, question=query)
|
| 171 |
+
|
| 172 |
+
messages = [
|
| 173 |
+
SystemMessage(content=SYSTEM_PROMPT),
|
| 174 |
+
HumanMessage(
|
| 175 |
+
content=USER_PROMPT
|
| 176 |
+
),]
|
| 177 |
+
|
| 178 |
+
###-----------------getting inference endpoints------------------------------
|
| 179 |
+
llm_qa = HuggingFaceEndpoint(
|
| 180 |
+
endpoint_url="https://nhe9phsr2zhs0e36.eu-west-1.aws.endpoints.huggingface.cloud",
|
| 181 |
+
max_new_tokens=512,
|
| 182 |
+
top_k=10,
|
| 183 |
+
top_p=0.95,
|
| 184 |
+
typical_p=0.95,
|
| 185 |
+
temperature=0.01,
|
| 186 |
+
repetition_penalty=1.03,)
|
| 187 |
+
|
| 188 |
+
# create rag chain
|
| 189 |
+
chat_model = ChatHuggingFace(llm=llm_qa)
|
| 190 |
+
chain = chat_model | StrOutputParser()
|
| 191 |
+
|
| 192 |
+
###-------------------------- get answers ---------------------------------------
|
| 193 |
+
answer_lst = []
|
| 194 |
+
for question, context in zip(question_lst , context_retrieved_lst):
|
| 195 |
+
answer = chain.invoke(messages)
|
| 196 |
+
answer_lst.append(answer)
|
| 197 |
+
docs_html = []
|
| 198 |
+
for i, d in enumerate(context_retrieved, 1):
|
| 199 |
+
docs_html.append(make_html_source(d, i))
|
| 200 |
+
docs_html = "".join(docs_html)
|
| 201 |
+
|
| 202 |
+
previous_answer = history[-1][1]
|
| 203 |
+
previous_answer = previous_answer if previous_answer is not None else ""
|
| 204 |
+
answer_yet = previous_answer + answer_lst[0]
|
| 205 |
+
answer_yet = parse_output_llm_with_sources(answer_yet)
|
| 206 |
+
history[-1] = (query,answer_yet)
|
| 207 |
+
|
| 208 |
+
history = [tuple(x) for x in history]
|
| 209 |
|
| 210 |
yield history,docs_html
|
| 211 |
#process_pdf()
|