PleIAs-Editor

Runtime error

App Files Files Community

Pclanglais commited on Jul 1, 2024

Commit

0dfb412

verified ·

1 Parent(s): 8252e5b

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -175

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import re
-from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, AutoModel, StoppingCriteria, StoppingCriteriaList, TextIteratorStreamer
 from vllm import LLM, SamplingParams
 import torch
 import gradio as gr
@@ -7,169 +8,82 @@ import json
 import os
 import shutil
 import requests
-import numpy as np
 import pandas as pd
-from threading import Thread
-from FlagEmbedding import BGEM3FlagModel
-from sklearn.metrics.pairwise import cosine_similarity
-from transformers import AutoModelForSequenceClassification
 device = "cuda" if torch.cuda.is_available() else "cpu"
-#Importing the embedding model
-embedding_model = BGEM3FlagModel('BAAI/bge-m3',
-                       use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
-embeddings = np.load("embeddings_albert_tchap.npy")
-embeddings_data = pd.read_json("embeddings_albert_tchap.json")
-embeddings_text = embeddings_data["text_with_context"].tolist()
-#Importing the classifier/router (deberta)
-classifier_model = AutoModelForSequenceClassification.from_pretrained("AgentPublic/chatrag-deberta")
-classifier_tokenizer = AutoTokenizer.from_pretrained("AgentPublic/chatrag-deberta")
-#Importing the actual generative LLM (llama-based)
-model_name = "Pclanglais/Tchap"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
-model = model.to('cuda:0')
-system_prompt = "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nTu es Albert, l'agent conversationnel des services publics qui peut décrire des documents de référence ou aider à des tâches de rédaction<|eot_id|>"
-source_text = "Les sources utilisées par Albert-Tchap vont apparaître ici'"
-#Function to guess whether we use the RAG or not.
-def classification_chatrag(query):
-  print(query)
-  encoding = classifier_tokenizer(query, return_tensors="pt")
-  encoding = {k: v.to(classifier_model.device) for k,v in encoding.items()}
-  outputs = classifier_model(**encoding)
-  logits = outputs.logits
-  logits.shape
-  # apply sigmoid + threshold
-  sigmoid = torch.nn.Sigmoid()
-  probs = sigmoid(logits.squeeze().cpu())
-  predictions = np.zeros(probs.shape)
-  # Extract the float value from the tensor
-  float_value = round(probs.item()*100)
-  print(float_value)
-  if float_value > 50:
-    status = True
-    print("We activate RAG")
-  else:
-    status = False
-    print("We remove RAG")
-  return status
-#Vector search over the database
-def vector_search(sentence_query):
-    query_embedding = embedding_model.encode(sentence_query,
-                            batch_size=12,
-                            max_length=256, # If you don't need such a long length, you can set a smaller value to speed up the encoding process.
-                            )['dense_vecs']
-    # Reshape the query embedding to fit the cosine_similarity function requirements
-    query_embedding_reshaped = query_embedding.reshape(1, -1)
-    # Compute cosine similarities
-    similarities = cosine_similarity(query_embedding_reshaped, embeddings)
-    # Find the index of the closest document (highest similarity)
-    closest_doc_index = np.argmax(similarities)
-    # Closest document's embedding
-    closest_doc_embedding = embeddings_text[closest_doc_index]
-    return closest_doc_embedding
-class StopOnTokens(StoppingCriteria):
-    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
-        stop_ids = [29, 0]
-        for stop_id in stop_ids:
-            if input_ids[0][-1] == stop_id:
-                return True
-        return False
-def predict(history_transformer_format):
-    print(history_transformer_format)
-    stop = StopOnTokens()
-    messages = []
-    id_message = 1
-    total_message = len(history_transformer_format)
-    for item in history_transformer_format:
-        #Once we target the ongoing post we add the source.
-        if id_message == total_message:
-            if assess_rag:
-                question = "<|start_header_id|>user<|end_header_id|>\n\n"+ item[0] + "\n\n### Source ###\n" + source_text
-            else:
-                question = "<|start_header_id|>user<|end_header_id|>\n\n"+ item[0]
-        else:
-            question = "<|start_header_id|>user<|end_header_id|>\n\n"+ item[0]
-        answer = "<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"+item[1]
-        result = "".join([question, answer])
-        messages.append(result)
-        id_message = id_message + 1
-    messages = "".join(messages)
-    print(messages)
-    messages = system_prompt + messages
-    print(messages)
-    model_inputs = tokenizer([messages], return_tensors="pt").to("cuda")
-    streamer = TextIteratorStreamer(tokenizer, timeout=10., skip_prompt=True, skip_special_tokens=True)
-    generate_kwargs = dict(
-        model_inputs,
-        streamer=streamer,
-        max_new_tokens=1024,
-        do_sample=False,
-        top_p=0.95,
-        temperature=0.4,
-        stopping_criteria=StoppingCriteriaList([stop])
-        )
-    t = Thread(target=model.generate, kwargs=generate_kwargs)
-    t.start()
-    history_transformer_format[-1][1] = ""
-    for new_token in streamer:
-        if new_token != '<':
-            history_transformer_format[-1][1] += new_token
-            yield history_transformer_format
-def user(message, history):
-    global source_text
-    global assess_rag
-    #For now, we only query the vector database once, at the start.
-    if len(history) == 0:
-        assess_rag = classification_chatrag(message)
-        if assess_rag:
-            source_text = vector_search(message)
-        else:
-            source_text = "Albert-Tchap n'utilise pas de sources comme votre requête n'a pas l'air d'en recueillir."
-    history_transformer_format = history + [[message, ""]]
-    print(history_transformer_format)
-    return "", history_transformer_format, source_text
 # Define the Gradio interface
-title = "Tchap"
-description = "Le chatbot du service public"
 examples = [
     [
         "Qui peut bénéficier de l'AIP?",  # user_message
@@ -177,26 +91,26 @@ examples = [
     ]
 ]
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column(scale=2):
-            gr.HTML("<h2>Chat</2>")
-            chatbot = gr.Chatbot()
-            msg = gr.Textbox()
-            clear = gr.Button("Clear")
-            history = gr.State()
-        with gr.Column(scale=1):
-            gr.HTML("<h2>Source utilisée</2>")
-            user_output = gr.HTML()  # To display the user's message
-    msg.submit(user, inputs=[msg, chatbot], outputs=[msg, chatbot, user_output], queue=False).then(
-                predict, chatbot, chatbot
-            )
-    clear.click(lambda: None, None, chatbot, queue=False)
-demo.queue()
-demo.launch()

+import transformers
 import re
+from transformers import AutoConfig, AutoTokenizer, AutoModel, AutoModelForCausalLM
 from vllm import LLM, SamplingParams
 import torch
 import gradio as gr
 import os
 import shutil
 import requests
+import chromadb
 import pandas as pd
+from chromadb.config import Settings
+from chromadb.utils import embedding_functions
+# Define the device
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_name = "PleIAs/OCRonos"
+llm = LLM(model_name, max_model_len=8128)
+#CSS for references formatting
+css = """
+.generation {
+    margin-left:2em;
+    margin-right:2em;
+    size:1.2em;
+}
+:target {
+    background-color: #CCF3DF; /* Change the text color to red */
+  }
+.source {
+    float:left;
+    max-width:17%;
+    margin-left:2%;
+}
+.tooltip {
+    position: relative;
+    cursor: pointer;
+    font-variant-position: super;
+    color: #97999b;
+  }
+  .tooltip:hover::after {
+    content: attr(data-text);
+    position: absolute;
+    left: 0;
+    top: 120%; /* Adjust this value as needed to control the vertical spacing between the text and the tooltip */
+    white-space: pre-wrap; /* Allows the text to wrap */
+    width: 500px; /* Sets a fixed maximum width for the tooltip */
+    max-width: 500px; /* Ensures the tooltip does not exceed the maximum width */
+    z-index: 1;
+    background-color: #f9f9f9;
+    color: #000;
+    border: 1px solid #ddd;
+    border-radius: 5px;
+    padding: 5px;
+    display: block;
+    box-shadow: 0 4px 8px rgba(0,0,0,0.1); /* Optional: Adds a subtle shadow for better visibility */
+  }"""
+#Curtesy of chatgpt
+# Class to encapsulate the Falcon chatbot
+class MistralChatBot:
+    def __init__(self, system_prompt="Le dialogue suivant est une conversation"):
+        self.system_prompt = system_prompt
+    def predict(self, user_message):
+        sampling_params = SamplingParams(temperature=0.9, top_p=0.95, max_tokens=4000, presence_penalty=0, stop=["#END#"])
+        detailed_prompt = correction = f"### TEXT ###\n{user_message}\n\n### CORRECTION ###\n"
+        print(detailed_prompt)
+        prompts = [detailed_prompt]
+        outputs = llm.generate(prompts, sampling_params, use_tqdm = False)
+        generated_text = outputs[0].outputs[0].text
+        generated_text = '<h2 style="text-align:center">Réponse</h3>\n<div class="generation">' + generated_text + "</div>"
+        return generated_text
+# Create the Falcon chatbot instance
+mistral_bot = MistralChatBot()
 # Define the Gradio interface
+title = "Correction d'OCR"
+description = "Un outil expérimental de correction d'OCR basé sur des modèles de langue"
 examples = [
     [
         "Qui peut bénéficier de l'AIP?",  # user_message
     ]
 ]
+additional_inputs=[
+    gr.Slider(
+        label="Température",
+        value=0.2,  # Default value
+        minimum=0.05,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Des valeurs plus élevées donne plus de créativité, mais aussi d'étrangeté",
+    ),
+]
+demo = gr.Blocks()
+with gr.Blocks(theme='JohnSmith9982/small_and_pretty', css=css) as demo:
+    gr.HTML("""<h1 style="text-align:center">Correction d'OCR</h1>""")
+    text_input = gr.Textbox(label="Votre texte.", type="text", lines=1)
+    text_button = gr.Button("Corriger l'OCR")
+    text_output = gr.HTML(label="Le texte corrigé")
+    text_button.click(mistral_bot.predict, inputs=text_input, outputs=[text_output])
+if __name__ == "__main__":
+    demo.queue().launch()