Spaces:

Rhodham96
/

RAG_chatbot

Sleeping

App Files Files Community

Rhodham96 commited on May 15

Commit

02136c1

1 Parent(s): 0eaf837

first commit

Browse files

Files changed (3) hide show

Dockerfile +28 -0
chat.py +184 -0
requirements.txt +125 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+# Use an official lightweight Python image
+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Copy requirements file if you have one, else you can install directly
+# If you have a requirements.txt, uncomment the next two lines:
+# COPY requirements.txt .
+# RUN pip install --no-cache-dir -r requirements.txt
+# Install dependencies (add your actual dependencies here)
+RUN pip install --no-cache-dir streamlit genai pymupdf langchain
+# Copy the app code into the container
+COPY chat.py .
+# Expose Streamlit default port
+EXPOSE 8501
+# Set environment variables for Streamlit to run in the container
+ENV STREAMLIT_SERVER_PORT=8501
+ENV STREAMLIT_SERVER_HEADLESS=true
+ENV STREAMLIT_SERVER_ENABLECORS=false
+ENV STREAMLIT_SERVER_ENABLEWEBUI=false
+# Command to run the Streamlit app
+CMD ["streamlit", "run", "chat.py", "--server.port=8501", "--server.address=0.0.0.0"]

chat.py ADDED Viewed

	@@ -0,0 +1,184 @@

+import streamlit as st
+import google.generativeai as genai
+import fitz
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+import numpy as np
+import faiss
+def warn(*args, **kwargs):
+    pass
+import warnings
+warnings.warn = warn
+warnings.filterwarnings('ignore')
+from langchain_community.document_loaders import PyPDFLoader
+# Initialize session state variables
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+if "uploaded_files" not in st.session_state:
+    st.session_state.uploaded_files = []
+if "api_key" not in st.session_state:
+    st.session_state.api_key = ""
+def extract_text_from_pdf(file):
+    file.seek(0)
+    pdf_bytes = file.read()
+    if not pdf_bytes:
+        raise ValueError(f"Le fichier {file.name} est vide ou n’a pas pu être lu.")
+    try:
+        doc = fitz.open(stream=pdf_bytes, filetype="pdf")
+    except Exception as e:
+        raise RuntimeError(f"Erreur lors de l'ouverture du fichier {file.name} : {e}")
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+def process_files(files):
+    texts = []
+    for file in files:
+        if file.type == "text/plain":
+            content = file.getvalue().decode("utf-8")
+            texts.append(content)
+        elif file.type == "application/pdf":
+            content = extract_text_from_pdf(file)
+            texts.append(content)
+    return "\n".join(texts)
+def build_index(text):
+    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    chunks = splitter.split_text(text)
+    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    vectors = [embeddings.embed_query(chunk) for chunk in chunks]
+    dimension = len(vectors[0])
+    index = faiss.IndexFlatL2(dimension)
+    index.add(np.array(vectors).astype("float32"))
+    return index, chunks
+def retrieve_chunks(query, index, chunks, k=3):
+    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+    query_vector = np.array([embeddings.embed_query(query)]).astype("float32")
+    distances, indices = index.search(query_vector, k)
+    return [chunks[i] for i in indices[0]]
+def create_sidebar():
+    with st.sidebar:
+        st.title("🤖 Gemini Chatbot")
+        # API key input + validate button
+        api_key_input = st.text_input("Google API Key:", type="password")
+        if st.button("Validate API"):
+            if api_key_input.strip():
+                st.session_state.api_key = api_key_input.strip()
+                st.success("API Key saved ✅")
+            else:
+                st.error("Please enter a valid API key.")
+        # Show file uploader only if API key is set
+        if st.session_state.api_key:
+            uploaded_files = st.file_uploader(
+                "📂 Upload your files (txt, pdf, etc.)",
+                accept_multiple_files=True
+            )
+            if uploaded_files:
+                for file in uploaded_files:
+                    # Avoid duplicates
+                    if file.name not in [f.name for f in st.session_state.uploaded_files]:
+                        st.session_state.uploaded_files.append(file)
+                st.success(f"{len(st.session_state.uploaded_files)} files loaded.")
+            if st.session_state.uploaded_files:
+                st.markdown("**Files currently loaded:**")
+                for f in st.session_state.uploaded_files:
+                    st.write(f.name)
+def main():
+    if "faiss_index" not in st.session_state:
+        st.session_state.faiss_index = None
+    if "chunks" not in st.session_state:
+        st.session_state.chunks = []
+    st.set_page_config(page_title="Gemini Chatbot")
+    create_sidebar()
+    if not st.session_state.api_key:
+        st.warning("👆 Please enter and validate your API key in the sidebar.")
+        return
+    genai.configure(api_key=st.session_state.api_key)
+    model = genai.GenerativeModel("gemini-2.0-flash")
+    # Build index if not done yet
+    if st.session_state.uploaded_files and st.session_state.faiss_index is None:
+        full_text = process_files(st.session_state.uploaded_files)
+        if full_text:
+            index, chunks = build_index(full_text)
+            st.write(f"Nombre de chunks : {len(chunks)}")
+            if "faiss_index" in st.session_state and st.session_state.faiss_index is not None:
+                st.write(f"Dimension FAISS : {st.session_state.faiss_index.d}")
+                st.write(f"Taille index FAISS : {st.session_state.faiss_index.ntotal}")
+            else:
+                st.warning("L'index FAISS n'est pas encore initialisé.")
+            st.session_state.faiss_index = index
+            st.session_state.chunks = chunks
+    st.title("💬 Gemini Chatbot")
+    # Show chat history
+    chat_container = st.container()
+    with chat_container:
+        for msg in st.session_state.messages:
+            with st.chat_message(msg["role"]):
+                st.markdown(msg["content"])
+    # Champ texte toujours en bas
+    prompt = st.text_area("Type your question here...", key="input")
+    if st.button("Send"):
+        # Ajouter message utilisateur
+        if not prompt.strip():
+            st.warning("Please write a message before sending.")
+            return
+        st.session_state.messages.append({"role": "user", "content": prompt})
+        #st.experimental_rerun()
+        with st.chat_message("user"):
+            st.markdown(prompt)
+        with st.chat_message("assistant"):
+            with st.spinner("Thinking..."):
+                try:
+                    # Retrieve relevant chunks from your files
+                    if st.session_state.faiss_index:
+                        relevant_chunks = retrieve_chunks(prompt, st.session_state.faiss_index, st.session_state.chunks, k=3)
+                        context = "\n---\n".join(chunk if isinstance(chunk, str) else chunk.page_content for chunk in relevant_chunks)
+                        prompt_with_context = f"Use the following context to answer the question:\n{context}\n\nQuestion: {prompt}"
+                    else:
+                        prompt_with_context = prompt  # fallback
+                    response = model.generate_content(prompt_with_context)
+                    if response.text:
+                        st.markdown(response.text)
+                        st.session_state.messages.append({"role": "assistant", "content": response.text})
+                    else:
+                        st.markdown("Sorry, I couldn't get a response.")
+                        st.session_state.messages.append({"role": "assistant", "content": "Sorry, I couldn't get a response."})
+                except Exception as e:
+                    st.error(f"Error: {e}")
+                    st.session_state.messages.append({"role": "assistant", "content": f"Error: {e}"})
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,125 @@

+acres==0.3.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.18
+aiosignal==1.3.2
+altair==5.5.0
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+blinker==1.9.0
+cachetools==5.5.2
+certifi==2025.4.26
+charset-normalizer==3.4.2
+ci-info==0.3.0
+click==8.2.0
+configobj==5.0.9
+configparser==7.2.0
+dataclasses-json==0.6.7
+etelemetry==0.3.1
+faiss-cpu==1.11.0
+filelock==3.18.0
+frozenlist==1.6.0
+fsspec==2025.3.2
+gitdb==4.0.12
+GitPython==3.1.44
+google-ai-generativelanguage==0.6.15
+google-api-core==2.25.0rc1
+google-api-python-client==2.169.0
+google-auth==2.40.1
+google-auth-httplib2==0.2.0
+google-generativeai==0.8.5
+googleapis-common-protos==1.70.0
+grpcio==1.71.0
+grpcio-status==1.71.0
+h11==0.16.0
+httpcore==1.0.9
+httplib2==0.22.0
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.31.2
+idna==3.10
+importlib_resources==6.5.2
+isodate==0.6.1
+Jinja2==3.1.6
+joblib==1.5.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2025.4.1
+langchain==0.3.25
+langchain-community==0.3.24
+langchain-core==0.3.59
+langchain-text-splitters==0.3.8
+langsmith==0.3.42
+looseversion==1.3.0
+lxml==5.4.0
+MarkupSafe==3.0.2
+marshmallow==3.26.1
+mpmath==1.3.0
+multidict==6.4.3
+mypy_extensions==1.1.0
+narwhals==1.39.0
+networkx==3.4.2
+nibabel==5.3.2
+nipype==1.10.0
+numpy==2.2.5
+orjson==3.10.18
+packaging==24.2
+pandas==2.2.3
+pathlib==1.0.1
+pillow==11.2.1
+propcache==0.3.1
+proto-plus==1.26.1
+protobuf==5.29.4
+prov==2.0.1
+puremagic==1.29
+pyarrow==20.0.0
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pydantic==2.11.4
+pydantic-settings==2.9.1
+pydantic_core==2.33.2
+pydeck==0.9.1
+pydot==4.0.0
+PyMuPDF==1.25.5
+pyparsing==3.2.3
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+pytz==2025.2
+pyxnat==1.6.3
+PyYAML==6.0.2
+rdflib==6.3.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+rpds-py==0.24.0
+rsa==4.9.1
+safetensors==0.5.3
+scikit-learn==1.6.1
+scipy==1.15.3
+sentence-transformers==4.1.0
+simplejson==3.20.1
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+SQLAlchemy==2.0.41
+streamlit==1.45.1
+sympy==1.14.0
+tenacity==9.1.2
+threadpoolctl==3.6.0
+tokenizers==0.21.1
+toml==0.10.2
+torch==2.7.0
+tornado==6.4.2
+tqdm==4.67.1
+traits==7.0.2
+transformers==4.51.3
+typing-inspect==0.9.0
+typing-inspection==0.4.0
+typing_extensions==4.13.2
+tzdata==2025.2
+uritemplate==4.1.1
+urllib3==2.4.0
+yarl==1.20.0
+zstandard==0.23.0