gabrielchua commited on
Commit
1f73110
·
verified ·
1 Parent(s): 0a7d484

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -450
app.py DELETED
@@ -1,450 +0,0 @@
1
- """
2
- app.py
3
- """
4
-
5
- # Standard imports
6
- import json
7
- import os
8
- import sys
9
- import uuid
10
- import asyncio
11
- from datetime import datetime
12
-
13
- # Third party imports
14
- import openai
15
- import gradio as gr
16
- import gspread
17
- from google.oauth2 import service_account
18
- from transformers import AutoModel
19
-
20
- # Local imports
21
- from utils import get_embeddings
22
-
23
- # --- Categories
24
- CATEGORIES = {
25
- "binary": ["binary"],
26
- "hateful": ["hateful_l1", "hateful_l2"],
27
- "insults": ["insults"],
28
- "sexual": [
29
- "sexual_l1",
30
- "sexual_l2",
31
- ],
32
- "physical_violence": ["physical_violence"],
33
- "self_harm": ["self_harm_l1", "self_harm_l2"],
34
- "all_other_misconduct": [
35
- "all_other_misconduct_l1",
36
- "all_other_misconduct_l2",
37
- ],
38
- }
39
-
40
- # --- OpenAI Setup ---
41
- # Create both sync and async clients
42
- client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
43
- async_client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
44
-
45
- # --- Model Loading ---
46
- def load_lionguard2():
47
- model = AutoModel.from_pretrained("govtech/lionguard-2", trust_remote_code=True)
48
- return model
49
-
50
- model = load_lionguard2()
51
-
52
- # --- Google Sheets Config ---
53
- GOOGLE_SHEET_URL = os.environ.get("GOOGLE_SHEET_URL")
54
- GOOGLE_CREDENTIALS = os.environ.get("GCP_SERVICE_ACCOUNT")
55
- RESULTS_SHEET_NAME = "results"
56
- VOTES_SHEET_NAME = "votes"
57
- CHATBOT_SHEET_NAME = "chatbot"
58
-
59
- def get_gspread_client():
60
- credentials = service_account.Credentials.from_service_account_info(
61
- json.loads(GOOGLE_CREDENTIALS),
62
- scopes=[
63
- "https://www.googleapis.com/auth/spreadsheets",
64
- "https://www.googleapis.com/auth/drive",
65
- ],
66
- )
67
- return gspread.authorize(credentials)
68
-
69
- def save_results_data(row):
70
- try:
71
- gc = get_gspread_client()
72
- sheet = gc.open_by_url(GOOGLE_SHEET_URL)
73
- ws = sheet.worksheet(RESULTS_SHEET_NAME)
74
- ws.append_row(list(row.values()))
75
- except Exception as e:
76
- print(f"Error saving results data: {e}")
77
-
78
- def save_vote_data(text_id, agree):
79
- try:
80
- gc = get_gspread_client()
81
- sheet = gc.open_by_url(GOOGLE_SHEET_URL)
82
- ws = sheet.worksheet(VOTES_SHEET_NAME)
83
- vote_row = {
84
- "datetime": datetime.now().isoformat(),
85
- "text_id": text_id,
86
- "agree": agree
87
- }
88
- ws.append_row(list(vote_row.values()))
89
- except Exception as e:
90
- print(f"Error saving vote data: {e}")
91
-
92
- def log_chatbot_data(row):
93
- try:
94
- gc = get_gspread_client()
95
- sheet = gc.open_by_url(GOOGLE_SHEET_URL)
96
- ws = sheet.worksheet(CHATBOT_SHEET_NAME)
97
- ws.append_row([
98
- row["datetime"], row["text_id"], row["text"], row["binary_score"],
99
- row["hateful_l1_score"], row["hateful_l2_score"], row["insults_score"],
100
- row["sexual_l1_score"], row["sexual_l2_score"], row["physical_violence_score"],
101
- row["self_harm_l1_score"], row["self_harm_l2_score"], row["aom_l1_score"],
102
- row["aom_l2_score"], row["openai_score"]
103
- ])
104
- except Exception as e:
105
- print(f"Error saving chatbot data: {e}")
106
-
107
- # --- Classifier logic ---
108
-
109
- def format_score_with_style(score_str):
110
- if score_str == "-":
111
- return '<span style="color: #9ca3af;">-</span>'
112
- try:
113
- score = float(score_str)
114
- percentage = int(score * 100)
115
- if score < 0.4:
116
- return f'<span style="color: #34d399; font-weight:600;">👌 {percentage}%</span>'
117
- elif 0.4 <= score < 0.7:
118
- return f'<span style="color: #fbbf24; font-weight:600;">⚠️ {percentage}%</span>'
119
- else:
120
- return f'<span style="color: #fca5a5; font-weight:600;">🚨 {percentage}%</span>'
121
- except:
122
- return score_str
123
-
124
- def format_binary_score(score):
125
- percentage = int(score * 100)
126
- if score < 0.4:
127
- return f'<div style="background:linear-gradient(135deg, #065f46 0%, #047857 100%); color:#34d399; padding:24px 0; border-radius:20px; text-align:center; font-weight:900; border:3px solid #10b981; font-size:24px; margin:24px 0; box-shadow:0 4px 24px rgba(0,0,0,0.3);">✅ Pass ({percentage}/100)</div>'
128
- elif 0.4 <= score < 0.7:
129
- return f'<div style="background:linear-gradient(135deg, #92400e 0%, #b45309 100%); color:#fbbf24; padding:24px 0; border-radius:20px; text-align:center; font-weight:900; border:3px solid #f59e0b; font-size:24px; margin:24px 0; box-shadow:0 4px 24px rgba(0,0,0,0.3);">⚠️ Warning ({percentage}/100)</div>'
130
- else:
131
- return f'<div style="background:linear-gradient(135deg, #991b1b 0%, #b91c1c 100%); color:#fca5a5; padding:24px 0; border-radius:20px; text-align:center; font-weight:900; border:3px solid #ef4444; font-size:24px; margin:24px 0; box-shadow:0 4px 24px rgba(0,0,0,0.3);">🚨 Fail ({percentage}/100)</div>'
132
-
133
- def analyze_text(text):
134
- if not text.strip():
135
- empty_html = '<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Enter text to analyze</div>'
136
- return empty_html, empty_html, "", ""
137
- try:
138
- text_id = str(uuid.uuid4())
139
- embeddings = get_embeddings([text])
140
- results = model.predict(embeddings)
141
- binary_score = results.get('binary', [0.0])[0]
142
-
143
- main_categories = ['hateful', 'insults', 'sexual', 'physical_violence', 'self_harm', 'all_other_misconduct']
144
- categories_html = []
145
- max_scores = {}
146
- for category in main_categories:
147
- subcategories = CATEGORIES[category]
148
- category_name = category.replace('_', ' ').title()
149
- category_emojis = {
150
- 'Hateful': '🤬',
151
- 'Insults': '💢',
152
- 'Sexual': '🔞',
153
- 'Physical Violence': '⚔️',
154
- 'Self Harm': '☹️',
155
- 'All Other Misconduct': '🙅‍♀️'
156
- }
157
- category_display = f"{category_emojis.get(category_name, '📝')} {category_name}"
158
- level_scores = [results.get(subcategory_key, [0.0])[0] for subcategory_key in subcategories]
159
- max_score = max(level_scores) if level_scores else 0.0
160
- max_scores[category] = max_score
161
- categories_html.append(f'''
162
- <tr>
163
- <td>{category_display}</td>
164
- <td style="text-align: center;">{format_score_with_style(f"{max_score:.4f}")}</td>
165
- </tr>
166
- ''')
167
-
168
- html_table = f'''
169
- <table style="width:100%">
170
- <thead>
171
- <tr><th>Category</th><th>Score</th></tr>
172
- </thead>
173
- <tbody>
174
- {''.join(categories_html)}
175
- </tbody>
176
- </table>
177
- '''
178
-
179
- # Save to Google Sheets if enabled
180
- if GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
181
- results_row = {
182
- "datetime": datetime.now().isoformat(),
183
- "text_id": text_id,
184
- "text": text,
185
- "binary_score": binary_score,
186
- }
187
- for category in main_categories:
188
- results_row[f"{category}_max"] = max_scores[category]
189
- save_results_data(results_row)
190
-
191
- voting_html = '<div>Help improve LionGuard2! Rate the analysis below.</div>'
192
- return format_binary_score(binary_score), html_table, text_id, voting_html
193
-
194
- except Exception as e:
195
- error_msg = f"Error analyzing text: {str(e)}"
196
- return f'<div style="color: #fca5a5;">❌ {error_msg}</div>', '', '', ''
197
-
198
- def vote_thumbs_up(text_id):
199
- if text_id and GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
200
- save_vote_data(text_id, True)
201
- return '<div style="color: #34d399; font-weight:700;">🎉 Thank you!</div>'
202
- return '<div>Voting not available or analysis not yet run.</div>'
203
-
204
- def vote_thumbs_down(text_id):
205
- if text_id and GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
206
- save_vote_data(text_id, False)
207
- return '<div style="color: #fca5a5; font-weight:700;">📝 Thanks for the feedback!</div>'
208
- return '<div>Voting not available or analysis not yet run.</div>'
209
-
210
- # --- Guardrail Comparison logic (ASYNC VERSION) ---
211
-
212
- async def get_openai_response_async(message, system_prompt="You are a helpful assistant."):
213
- """Async version of OpenAI API call"""
214
- try:
215
- response = await async_client.chat.completions.create(
216
- model="gpt-4.1-nano",
217
- messages=[
218
- {"role": "system", "content": system_prompt},
219
- {"role": "user", "content": message}
220
- ],
221
- max_tokens=500,
222
- temperature=0,
223
- seed=42,
224
- )
225
- return response.choices[0].message.content
226
- except Exception as e:
227
- return f"Error: {str(e)}. Please check your OpenAI API key."
228
-
229
- async def openai_moderation_async(message):
230
- """Async version of OpenAI moderation"""
231
- try:
232
- response = await async_client.moderations.create(input=message)
233
- return response.results[0].flagged
234
- except Exception as e:
235
- print(f"Error in OpenAI moderation: {e}")
236
- return False
237
-
238
- def lionguard_2_sync(message, threshold=0.5):
239
- """LionGuard remains sync as it's using a local model"""
240
- try:
241
- embeddings = get_embeddings([message])
242
- results = model.predict(embeddings)
243
- binary_prob = results['binary'][0]
244
- return binary_prob > threshold, binary_prob
245
- except Exception as e:
246
- print(f"Error in LionGuard 2: {e}")
247
- return False, 0.0
248
-
249
- async def process_no_moderation(message, history_no_mod):
250
- """Process message without moderation"""
251
- no_mod_response = await get_openai_response_async(message)
252
- history_no_mod.append({"role": "user", "content": message})
253
- history_no_mod.append({"role": "assistant", "content": no_mod_response})
254
- return history_no_mod
255
-
256
- async def process_openai_moderation(message, history_openai):
257
- """Process message with OpenAI moderation"""
258
- openai_flagged = await openai_moderation_async(message)
259
- history_openai.append({"role": "user", "content": message})
260
- if openai_flagged:
261
- openai_response = "🚫 This message has been flagged by OpenAI moderation"
262
- history_openai.append({"role": "assistant", "content": openai_response})
263
- else:
264
- openai_response = await get_openai_response_async(message)
265
- history_openai.append({"role": "assistant", "content": openai_response})
266
- return history_openai
267
-
268
- async def process_lionguard(message, history_lg):
269
- """Process message with LionGuard 2"""
270
- # Run LionGuard sync check in thread pool to not block
271
- loop = asyncio.get_event_loop()
272
- lg_flagged, lg_score = await loop.run_in_executor(None, lionguard_2_sync, message, 0.5)
273
-
274
- history_lg.append({"role": "user", "content": message})
275
- if lg_flagged:
276
- lg_response = "🚫 This message has been flagged by LionGuard 2"
277
- history_lg.append({"role": "assistant", "content": lg_response})
278
- else:
279
- lg_response = await get_openai_response_async(message)
280
- history_lg.append({"role": "assistant", "content": lg_response})
281
- return history_lg, lg_score
282
-
283
- async def process_message_async(message, history_no_mod, history_openai, history_lg):
284
- """Process message concurrently across all three guardrails"""
285
- if not message.strip():
286
- return history_no_mod, history_openai, history_lg, ""
287
-
288
- # Run all three processes concurrently using asyncio.gather
289
- results = await asyncio.gather(
290
- process_no_moderation(message, history_no_mod),
291
- process_openai_moderation(message, history_openai),
292
- process_lionguard(message, history_lg),
293
- return_exceptions=True # Continue even if one fails
294
- )
295
-
296
- # Unpack results
297
- history_no_mod = results[0] if not isinstance(results[0], Exception) else history_no_mod
298
- history_openai = results[1] if not isinstance(results[1], Exception) else history_openai
299
- history_lg_result = results[2] if not isinstance(results[2], Exception) else (history_lg, 0.0)
300
- history_lg = history_lg_result[0]
301
- lg_score = history_lg_result[1] if isinstance(history_lg_result, tuple) else 0.0
302
-
303
- # --- Logging for chatbot worksheet (runs in background) ---
304
- if GOOGLE_SHEET_URL and GOOGLE_CREDENTIALS:
305
- try:
306
- loop = asyncio.get_event_loop()
307
- # Run logging in thread pool so it doesn't block
308
- loop.run_in_executor(None, _log_chatbot_sync, message, lg_score)
309
- except Exception as e:
310
- print(f"Chatbot logging failed: {e}")
311
-
312
- return history_no_mod, history_openai, history_lg, ""
313
-
314
- def _log_chatbot_sync(message, lg_score):
315
- """Sync helper for logging - runs in thread pool"""
316
- try:
317
- embeddings = get_embeddings([message])
318
- results = model.predict(embeddings)
319
- now = datetime.now().isoformat()
320
- text_id = str(uuid.uuid4())
321
- row = {
322
- "datetime": now,
323
- "text_id": text_id,
324
- "text": message,
325
- "binary_score": results.get("binary", [None])[0],
326
- "hateful_l1_score": results.get(CATEGORIES['hateful'][0], [None])[0],
327
- "hateful_l2_score": results.get(CATEGORIES['hateful'][1], [None])[0],
328
- "insults_score": results.get(CATEGORIES['insults'][0], [None])[0],
329
- "sexual_l1_score": results.get(CATEGORIES['sexual'][0], [None])[0],
330
- "sexual_l2_score": results.get(CATEGORIES['sexual'][1], [None])[0],
331
- "physical_violence_score": results.get(CATEGORIES['physical_violence'][0], [None])[0],
332
- "self_harm_l1_score": results.get(CATEGORIES['self_harm'][0], [None])[0],
333
- "self_harm_l2_score": results.get(CATEGORIES['self_harm'][1], [None])[0],
334
- "aom_l1_score": results.get(CATEGORIES['all_other_misconduct'][0], [None])[0],
335
- "aom_l2_score": results.get(CATEGORIES['all_other_misconduct'][1], [None])[0],
336
- "openai_score": None
337
- }
338
- try:
339
- openai_result = client.moderations.create(input=message)
340
- row["openai_score"] = float(openai_result.results[0].category_scores.get("hate", 0.0))
341
- except Exception:
342
- row["openai_score"] = None
343
-
344
- log_chatbot_data(row)
345
- except Exception as e:
346
- print(f"Error in sync logging: {e}")
347
-
348
- def process_message(message, history_no_mod, history_openai, history_lg):
349
- """Wrapper function for Gradio (converts async to sync)"""
350
- return asyncio.run(process_message_async(message, history_no_mod, history_openai, history_lg))
351
-
352
- def clear_all_chats():
353
- return [], [], []
354
-
355
- # ---- MAIN GRADIO UI ----
356
-
357
- DISCLAIMER = """
358
- <div style='background: #fbbf24; color: #1e293b; border-radius: 8px; padding: 14px; margin-bottom: 12px; font-size: 15px; font-weight:500;'>
359
- ⚠️ LionGuard 2 may make mistakes. All entries are logged (anonymised) to improve the model.
360
- </div>
361
- """
362
-
363
- with gr.Blocks(title="LionGuard 2 Demo", theme=gr.themes.Soft()) as demo:
364
- gr.HTML("<h1 style='text-align:center'>LionGuard 2 Demo</h1>")
365
-
366
- with gr.Tabs():
367
- with gr.Tab("Classifier"):
368
- gr.HTML(DISCLAIMER)
369
- with gr.Row():
370
- with gr.Column(scale=1, min_width=400):
371
- text_input = gr.Textbox(
372
- label="Enter text to analyze:",
373
- placeholder="Type your text here...",
374
- lines=8,
375
- max_lines=16,
376
- container=True
377
- )
378
- analyze_btn = gr.Button("Analyze", variant="primary")
379
- with gr.Column(scale=1, min_width=400):
380
- binary_output = gr.HTML(
381
- value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic; font-size:36px;">Enter text to analyze</div>'
382
- )
383
- category_table = gr.HTML(
384
- value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Category scores will appear here after analysis</div>'
385
- )
386
- voting_feedback = gr.HTML(value="")
387
- current_text_id = gr.Textbox(value="", visible=False)
388
-
389
- with gr.Row(visible=False) as voting_buttons_row:
390
- thumbs_up_btn = gr.Button("👍 Looks Accurate", variant="primary")
391
- thumbs_down_btn = gr.Button("👎 Looks Wrong", variant="secondary")
392
-
393
- def analyze_and_show_voting(text):
394
- binary_score, category_table_val, text_id, voting_html = analyze_text(text)
395
- show_vote = gr.update(visible=True) if text_id else gr.update(visible=False)
396
- return binary_score, category_table_val, text_id, show_vote, "", ""
397
-
398
- analyze_btn.click(
399
- analyze_and_show_voting,
400
- inputs=[text_input],
401
- outputs=[binary_output, category_table, current_text_id, voting_buttons_row, voting_feedback, voting_feedback]
402
- )
403
- text_input.submit(
404
- analyze_and_show_voting,
405
- inputs=[text_input],
406
- outputs=[binary_output, category_table, current_text_id, voting_buttons_row, voting_feedback, voting_feedback]
407
- )
408
- thumbs_up_btn.click(vote_thumbs_up, inputs=[current_text_id], outputs=[voting_feedback])
409
- thumbs_down_btn.click(vote_thumbs_down, inputs=[current_text_id], outputs=[voting_feedback])
410
-
411
- with gr.Tab("Guardrail Comparison"):
412
- gr.HTML(DISCLAIMER)
413
- with gr.Row():
414
- with gr.Column(scale=1):
415
- gr.Markdown("#### 🔵 No Moderation")
416
- chatbot_no_mod = gr.Chatbot(height=650, label="No Moderation", show_label=False, bubble_full_width=False, type='messages')
417
- with gr.Column(scale=1):
418
- gr.Markdown("#### 🟠 OpenAI Moderation")
419
- chatbot_openai = gr.Chatbot(height=650, label="OpenAI Moderation", show_label=False, bubble_full_width=False, type='messages')
420
- with gr.Column(scale=1):
421
- gr.Markdown("#### 🛡️ LionGuard 2")
422
- chatbot_lg = gr.Chatbot(height=650, label="LionGuard 2", show_label=False, bubble_full_width=False, type='messages')
423
- gr.Markdown("##### 💬 Send Message to All Models")
424
- with gr.Row():
425
- message_input = gr.Textbox(
426
- placeholder="Type your message to compare responses...",
427
- show_label=False,
428
- scale=4
429
- )
430
- send_btn = gr.Button("Send", variant="primary", scale=1)
431
- with gr.Row():
432
- clear_btn = gr.Button("Clear All Chats", variant="stop")
433
-
434
- send_btn.click(
435
- process_message,
436
- inputs=[message_input, chatbot_no_mod, chatbot_openai, chatbot_lg],
437
- outputs=[chatbot_no_mod, chatbot_openai, chatbot_lg, message_input]
438
- )
439
- message_input.submit(
440
- process_message,
441
- inputs=[message_input, chatbot_no_mod, chatbot_openai, chatbot_lg],
442
- outputs=[chatbot_no_mod, chatbot_openai, chatbot_lg, message_input]
443
- )
444
- clear_btn.click(
445
- clear_all_chats,
446
- outputs=[chatbot_no_mod, chatbot_openai, chatbot_lg]
447
- )
448
-
449
- if __name__ == "__main__":
450
- demo.launch()