Rhodham96 commited on
Commit
02136c1
·
1 Parent(s): 0eaf837

first commit

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. chat.py +184 -0
  3. requirements.txt +125 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official lightweight Python image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy requirements file if you have one, else you can install directly
8
+ # If you have a requirements.txt, uncomment the next two lines:
9
+ # COPY requirements.txt .
10
+ # RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ # Install dependencies (add your actual dependencies here)
13
+ RUN pip install --no-cache-dir streamlit genai pymupdf langchain
14
+
15
+ # Copy the app code into the container
16
+ COPY chat.py .
17
+
18
+ # Expose Streamlit default port
19
+ EXPOSE 8501
20
+
21
+ # Set environment variables for Streamlit to run in the container
22
+ ENV STREAMLIT_SERVER_PORT=8501
23
+ ENV STREAMLIT_SERVER_HEADLESS=true
24
+ ENV STREAMLIT_SERVER_ENABLECORS=false
25
+ ENV STREAMLIT_SERVER_ENABLEWEBUI=false
26
+
27
+ # Command to run the Streamlit app
28
+ CMD ["streamlit", "run", "chat.py", "--server.port=8501", "--server.address=0.0.0.0"]
chat.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import google.generativeai as genai
3
+ import fitz
4
+ from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ import numpy as np
7
+ import faiss
8
+ def warn(*args, **kwargs):
9
+ pass
10
+ import warnings
11
+ warnings.warn = warn
12
+ warnings.filterwarnings('ignore')
13
+
14
+ from langchain_community.document_loaders import PyPDFLoader
15
+
16
+
17
+ # Initialize session state variables
18
+ if "messages" not in st.session_state:
19
+ st.session_state.messages = []
20
+ if "uploaded_files" not in st.session_state:
21
+ st.session_state.uploaded_files = []
22
+ if "api_key" not in st.session_state:
23
+ st.session_state.api_key = ""
24
+
25
+ def extract_text_from_pdf(file):
26
+ file.seek(0)
27
+ pdf_bytes = file.read()
28
+
29
+ if not pdf_bytes:
30
+ raise ValueError(f"Le fichier {file.name} est vide ou n’a pas pu être lu.")
31
+
32
+ try:
33
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
34
+ except Exception as e:
35
+ raise RuntimeError(f"Erreur lors de l'ouverture du fichier {file.name} : {e}")
36
+
37
+ text = ""
38
+ for page in doc:
39
+ text += page.get_text()
40
+
41
+ return text
42
+
43
+
44
+ def process_files(files):
45
+ texts = []
46
+ for file in files:
47
+ if file.type == "text/plain":
48
+ content = file.getvalue().decode("utf-8")
49
+ texts.append(content)
50
+ elif file.type == "application/pdf":
51
+ content = extract_text_from_pdf(file)
52
+ texts.append(content)
53
+
54
+ return "\n".join(texts)
55
+
56
+ def build_index(text):
57
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
58
+ chunks = splitter.split_text(text)
59
+
60
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
61
+ vectors = [embeddings.embed_query(chunk) for chunk in chunks]
62
+ dimension = len(vectors[0])
63
+
64
+ index = faiss.IndexFlatL2(dimension)
65
+ index.add(np.array(vectors).astype("float32"))
66
+
67
+ return index, chunks
68
+
69
+ def retrieve_chunks(query, index, chunks, k=3):
70
+ embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
71
+ query_vector = np.array([embeddings.embed_query(query)]).astype("float32")
72
+ distances, indices = index.search(query_vector, k)
73
+ return [chunks[i] for i in indices[0]]
74
+
75
+
76
+ def create_sidebar():
77
+ with st.sidebar:
78
+ st.title("🤖 Gemini Chatbot")
79
+
80
+ # API key input + validate button
81
+ api_key_input = st.text_input("Google API Key:", type="password")
82
+ if st.button("Validate API"):
83
+ if api_key_input.strip():
84
+ st.session_state.api_key = api_key_input.strip()
85
+ st.success("API Key saved ✅")
86
+ else:
87
+ st.error("Please enter a valid API key.")
88
+
89
+ # Show file uploader only if API key is set
90
+ if st.session_state.api_key:
91
+ uploaded_files = st.file_uploader(
92
+ "📂 Upload your files (txt, pdf, etc.)",
93
+ accept_multiple_files=True
94
+ )
95
+ if uploaded_files:
96
+ for file in uploaded_files:
97
+ # Avoid duplicates
98
+ if file.name not in [f.name for f in st.session_state.uploaded_files]:
99
+ st.session_state.uploaded_files.append(file)
100
+ st.success(f"{len(st.session_state.uploaded_files)} files loaded.")
101
+
102
+ if st.session_state.uploaded_files:
103
+ st.markdown("**Files currently loaded:**")
104
+ for f in st.session_state.uploaded_files:
105
+ st.write(f.name)
106
+
107
+ def main():
108
+ if "faiss_index" not in st.session_state:
109
+ st.session_state.faiss_index = None
110
+ if "chunks" not in st.session_state:
111
+ st.session_state.chunks = []
112
+ st.set_page_config(page_title="Gemini Chatbot")
113
+ create_sidebar()
114
+
115
+ if not st.session_state.api_key:
116
+ st.warning("👆 Please enter and validate your API key in the sidebar.")
117
+ return
118
+
119
+ genai.configure(api_key=st.session_state.api_key)
120
+ model = genai.GenerativeModel("gemini-2.0-flash")
121
+
122
+ # Build index if not done yet
123
+ if st.session_state.uploaded_files and st.session_state.faiss_index is None:
124
+ full_text = process_files(st.session_state.uploaded_files)
125
+ if full_text:
126
+ index, chunks = build_index(full_text)
127
+ st.write(f"Nombre de chunks : {len(chunks)}")
128
+ if "faiss_index" in st.session_state and st.session_state.faiss_index is not None:
129
+ st.write(f"Dimension FAISS : {st.session_state.faiss_index.d}")
130
+ st.write(f"Taille index FAISS : {st.session_state.faiss_index.ntotal}")
131
+ else:
132
+ st.warning("L'index FAISS n'est pas encore initialisé.")
133
+
134
+ st.session_state.faiss_index = index
135
+ st.session_state.chunks = chunks
136
+
137
+ st.title("💬 Gemini Chatbot")
138
+
139
+ # Show chat history
140
+ chat_container = st.container()
141
+
142
+ with chat_container:
143
+ for msg in st.session_state.messages:
144
+ with st.chat_message(msg["role"]):
145
+ st.markdown(msg["content"])
146
+
147
+ # Champ texte toujours en bas
148
+ prompt = st.text_area("Type your question here...", key="input")
149
+
150
+ if st.button("Send"):
151
+ # Ajouter message utilisateur
152
+ if not prompt.strip():
153
+ st.warning("Please write a message before sending.")
154
+ return
155
+
156
+ st.session_state.messages.append({"role": "user", "content": prompt})
157
+ #st.experimental_rerun()
158
+ with st.chat_message("user"):
159
+ st.markdown(prompt)
160
+
161
+ with st.chat_message("assistant"):
162
+ with st.spinner("Thinking..."):
163
+ try:
164
+ # Retrieve relevant chunks from your files
165
+ if st.session_state.faiss_index:
166
+ relevant_chunks = retrieve_chunks(prompt, st.session_state.faiss_index, st.session_state.chunks, k=3)
167
+ context = "\n---\n".join(chunk if isinstance(chunk, str) else chunk.page_content for chunk in relevant_chunks)
168
+ prompt_with_context = f"Use the following context to answer the question:\n{context}\n\nQuestion: {prompt}"
169
+ else:
170
+ prompt_with_context = prompt # fallback
171
+
172
+ response = model.generate_content(prompt_with_context)
173
+ if response.text:
174
+ st.markdown(response.text)
175
+ st.session_state.messages.append({"role": "assistant", "content": response.text})
176
+ else:
177
+ st.markdown("Sorry, I couldn't get a response.")
178
+ st.session_state.messages.append({"role": "assistant", "content": "Sorry, I couldn't get a response."})
179
+ except Exception as e:
180
+ st.error(f"Error: {e}")
181
+ st.session_state.messages.append({"role": "assistant", "content": f"Error: {e}"})
182
+
183
+ if __name__ == "__main__":
184
+ main()
requirements.txt ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ acres==0.3.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.18
4
+ aiosignal==1.3.2
5
+ altair==5.5.0
6
+ annotated-types==0.7.0
7
+ anyio==4.9.0
8
+ attrs==25.3.0
9
+ blinker==1.9.0
10
+ cachetools==5.5.2
11
+ certifi==2025.4.26
12
+ charset-normalizer==3.4.2
13
+ ci-info==0.3.0
14
+ click==8.2.0
15
+ configobj==5.0.9
16
+ configparser==7.2.0
17
+ dataclasses-json==0.6.7
18
+ etelemetry==0.3.1
19
+ faiss-cpu==1.11.0
20
+ filelock==3.18.0
21
+ frozenlist==1.6.0
22
+ fsspec==2025.3.2
23
+ gitdb==4.0.12
24
+ GitPython==3.1.44
25
+ google-ai-generativelanguage==0.6.15
26
+ google-api-core==2.25.0rc1
27
+ google-api-python-client==2.169.0
28
+ google-auth==2.40.1
29
+ google-auth-httplib2==0.2.0
30
+ google-generativeai==0.8.5
31
+ googleapis-common-protos==1.70.0
32
+ grpcio==1.71.0
33
+ grpcio-status==1.71.0
34
+ h11==0.16.0
35
+ httpcore==1.0.9
36
+ httplib2==0.22.0
37
+ httpx==0.28.1
38
+ httpx-sse==0.4.0
39
+ huggingface-hub==0.31.2
40
+ idna==3.10
41
+ importlib_resources==6.5.2
42
+ isodate==0.6.1
43
+ Jinja2==3.1.6
44
+ joblib==1.5.0
45
+ jsonpatch==1.33
46
+ jsonpointer==3.0.0
47
+ jsonschema==4.23.0
48
+ jsonschema-specifications==2025.4.1
49
+ langchain==0.3.25
50
+ langchain-community==0.3.24
51
+ langchain-core==0.3.59
52
+ langchain-text-splitters==0.3.8
53
+ langsmith==0.3.42
54
+ looseversion==1.3.0
55
+ lxml==5.4.0
56
+ MarkupSafe==3.0.2
57
+ marshmallow==3.26.1
58
+ mpmath==1.3.0
59
+ multidict==6.4.3
60
+ mypy_extensions==1.1.0
61
+ narwhals==1.39.0
62
+ networkx==3.4.2
63
+ nibabel==5.3.2
64
+ nipype==1.10.0
65
+ numpy==2.2.5
66
+ orjson==3.10.18
67
+ packaging==24.2
68
+ pandas==2.2.3
69
+ pathlib==1.0.1
70
+ pillow==11.2.1
71
+ propcache==0.3.1
72
+ proto-plus==1.26.1
73
+ protobuf==5.29.4
74
+ prov==2.0.1
75
+ puremagic==1.29
76
+ pyarrow==20.0.0
77
+ pyasn1==0.6.1
78
+ pyasn1_modules==0.4.2
79
+ pydantic==2.11.4
80
+ pydantic-settings==2.9.1
81
+ pydantic_core==2.33.2
82
+ pydeck==0.9.1
83
+ pydot==4.0.0
84
+ PyMuPDF==1.25.5
85
+ pyparsing==3.2.3
86
+ python-dateutil==2.9.0.post0
87
+ python-dotenv==1.1.0
88
+ pytz==2025.2
89
+ pyxnat==1.6.3
90
+ PyYAML==6.0.2
91
+ rdflib==6.3.2
92
+ referencing==0.36.2
93
+ regex==2024.11.6
94
+ requests==2.32.3
95
+ requests-toolbelt==1.0.0
96
+ rpds-py==0.24.0
97
+ rsa==4.9.1
98
+ safetensors==0.5.3
99
+ scikit-learn==1.6.1
100
+ scipy==1.15.3
101
+ sentence-transformers==4.1.0
102
+ simplejson==3.20.1
103
+ six==1.17.0
104
+ smmap==5.0.2
105
+ sniffio==1.3.1
106
+ SQLAlchemy==2.0.41
107
+ streamlit==1.45.1
108
+ sympy==1.14.0
109
+ tenacity==9.1.2
110
+ threadpoolctl==3.6.0
111
+ tokenizers==0.21.1
112
+ toml==0.10.2
113
+ torch==2.7.0
114
+ tornado==6.4.2
115
+ tqdm==4.67.1
116
+ traits==7.0.2
117
+ transformers==4.51.3
118
+ typing-inspect==0.9.0
119
+ typing-inspection==0.4.0
120
+ typing_extensions==4.13.2
121
+ tzdata==2025.2
122
+ uritemplate==4.1.1
123
+ urllib3==2.4.0
124
+ yarl==1.20.0
125
+ zstandard==0.23.0