Spaces:

GIZ
/

audit_assistant

Running on T4

App Files Files Community

ppsingh commited on Aug 6, 2024

Commit

ba9c9b0

verified ·

1 Parent(s): 796ab9b

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -2

app.py CHANGED Viewed

@@ -138,13 +138,74 @@ async def chat(query,history,sources,reports,subtype,year):
           search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
         context_retrieved = retriever.invoke(question)
         def format_docs(docs):
-            return "\n\n".join(doc.page_content for doc in docs)
         context_retrieved_formatted = format_docs(context_retrieved)
         context_retrieved_lst.append(context_retrieved_formatted)
-    print(context_retrieved_lst)
     yield history,docs_html
 #process_pdf()

           search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.6, "k": 3, "filter":filter})
         context_retrieved = retriever.invoke(question)
+        for doc in context_retrieved:
+            print(doc.metadata)
         def format_docs(docs):
+            return "|".join(doc.page_content for doc in docs)
         context_retrieved_formatted = format_docs(context_retrieved)
         context_retrieved_lst.append(context_retrieved_formatted)
+    ##-------------------Prompt---------------------------------------------------------------
+    SYSTEM_PROMPT = """
+        You are AuditQ&A, an AI Assistant created by Auditors and Data Scientist. You are given a question and extracted passages of the consolidated/departmental/thematic focus audit reports. Provide a clear and structured answer based on the passages/context provided and the guidelines.
+        Guidelines:
+        - If the passages have useful facts or numbers, use them in your answer.
+        - Documents are separated by "|"
+        - When you use information from a passage, mention where it came from by using [Doc i] at the end of the sentence. i stands for the number of the document.
+        - Do not use the sentence 'Doc i says ...' to say where information came from.
+        - If the same thing is said in more than one document, you can mention all of them like this: [Doc i, Doc j, Doc k]
+        - Do not just summarize each passage one by one. Group your summaries to highlight the key parts in the explanation.
+        - If it makes sense, use bullet points and lists to make your answers easier to understand.
+        - You do not need to use every passage. Only use the ones that help answer the question.
+        - If the documents do not have the information needed to answer the question, just say you do not have enough information.
+        """
+    USER_PROMPT = """Passages:
+        {context}
+        -----------------------
+        Question: {question}  - Explained to audit expert
+        Answer in english with the passages citations:
+        """.format(context = context_retrieved_lst, question=query)
+    messages = [
+    SystemMessage(content=SYSTEM_PROMPT),
+    HumanMessage(
+        content=USER_PROMPT
+    ),]
+    ###-----------------getting inference endpoints------------------------------
+    llm_qa = HuggingFaceEndpoint(
+        endpoint_url="https://nhe9phsr2zhs0e36.eu-west-1.aws.endpoints.huggingface.cloud",
+        max_new_tokens=512,
+        top_k=10,
+        top_p=0.95,
+        typical_p=0.95,
+        temperature=0.01,
+        repetition_penalty=1.03,)
+    # create rag chain
+    chat_model = ChatHuggingFace(llm=llm_qa)
+    chain = chat_model | StrOutputParser()
+    ###-------------------------- get answers ---------------------------------------
+    answer_lst = []
+    for question, context in zip(question_lst , context_retrieved_lst):
+        answer = chain.invoke(messages)
+        answer_lst.append(answer)
+    docs_html = []
+    for i, d in enumerate(context_retrieved, 1):
+        docs_html.append(make_html_source(d, i))
+    docs_html = "".join(docs_html)
+    previous_answer = history[-1][1]
+    previous_answer = previous_answer if previous_answer is not None else ""
+    answer_yet = previous_answer + answer_lst[0]
+    answer_yet = parse_output_llm_with_sources(answer_yet)
+    history[-1] = (query,answer_yet)
+    history = [tuple(x) for x in history]
     yield history,docs_html
 #process_pdf()