DTabs commited on
Commit
b011eb4
Β·
verified Β·
1 Parent(s): eb939d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -29
app.py CHANGED
@@ -1,32 +1,46 @@
 
1
  import gradio as gr
2
  from parrot import Parrot
3
  import nltk
4
  from nltk.tokenize import sent_tokenize, word_tokenize
5
  import re
6
  import time
 
 
 
 
7
 
8
  # -----------------------------
9
  # Setup
10
  # -----------------------------
11
- nltk.data.path.append("./nltk_data") # Local punkt (no downloading)
 
12
  parrot = None # Lazy-loaded global model
13
 
 
 
 
14
 
 
 
 
 
 
 
 
 
 
15
  def get_parrot():
16
- """Load the Parrot model lazily"""
17
  global parrot
18
  if parrot is None:
19
- print("⏳ Loading Parrot model for the first time...")
20
  parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)
21
- print("βœ… Parrot model loaded successfully!")
22
  return parrot
23
 
24
 
25
- MAX_TOKENS = 150 # limit per chunk for stability
26
-
27
-
28
  # -----------------------------
29
- # Helper functions
30
  # -----------------------------
31
  def clean_sentence(sent):
32
  sent = sent.strip()
@@ -48,11 +62,7 @@ def split_long_sentence(sentence, max_tokens=MAX_TOKENS):
48
  return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
49
 
50
 
51
- # -----------------------------
52
- # πŸ”Ή Retry Wrapper
53
- # -----------------------------
54
  def with_retry(func, *args, retries=1, delay=3, **kwargs):
55
- """Try running a function twice before giving up"""
56
  for attempt in range(retries + 1):
57
  try:
58
  return func(*args, **kwargs)
@@ -65,7 +75,7 @@ def with_retry(func, *args, retries=1, delay=3, **kwargs):
65
 
66
 
67
  # -----------------------------
68
- # πŸ”Ή App 1: Full Paragraph Rephraser
69
  # -----------------------------
70
  def rephrase(text):
71
  model = get_parrot()
@@ -93,7 +103,7 @@ def rephrase(text):
93
 
94
 
95
  # -----------------------------
96
- # πŸ”Ή App 2: Sentence-wise Multiple Paraphrases
97
  # -----------------------------
98
  def generate_unique_paraphrases(sentence, N_OPTIONS=3):
99
  model = get_parrot()
@@ -104,10 +114,8 @@ def generate_unique_paraphrases(sentence, N_OPTIONS=3):
104
  adequacy_threshold=0.85,
105
  fluency_threshold=0.9,
106
  )
107
-
108
  if not paraphrases:
109
  return [sentence]
110
-
111
  texts = [p[0] for p in paraphrases]
112
  unique = []
113
  for t in texts:
@@ -132,12 +140,73 @@ def rephrase_sentencewise_unique(text, N_OPTIONS=3):
132
 
133
 
134
  # -----------------------------
135
- # πŸ”Ή Warm-up on startup
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  # -----------------------------
137
  def warmup():
138
- """Ping the model once to prevent timeout at first request"""
139
- print("πŸ”₯ Warming up Parrot model...")
140
  try:
 
141
  model = get_parrot()
142
  _ = model.augment(input_phrase="hello world", do_diverse=False)
143
  print("βœ… Warmup complete.")
@@ -149,30 +218,35 @@ warmup()
149
 
150
 
151
  # -----------------------------
152
- # πŸ”Ή Gradio Interfaces
153
  # -----------------------------
154
  rephrase_iface = gr.Interface(
155
  fn=rephrase,
156
  inputs=gr.Textbox(lines=10, placeholder="Paste your text here..."),
157
  outputs="text",
158
- title="Parrot Rephraser (Long Text)",
159
- description="Paraphrases long text while maintaining punctuation and capitalization.",
160
  )
161
 
162
  sentencewise_iface = gr.Interface(
163
  fn=rephrase_sentencewise_unique,
164
  inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
165
  outputs="text",
166
- title="Parrot Rephraser (Sentence-wise Options)",
167
- description="Generates top 3 unique paraphrases per sentence. Optimized for HF free-tier.",
 
 
 
 
 
 
 
 
168
  )
169
 
170
- # -----------------------------
171
- # πŸ”Ή Combine both interfaces into Tabs
172
- # -----------------------------
173
  demo = gr.TabbedInterface(
174
- [rephrase_iface, sentencewise_iface],
175
- ["Full Text Rephraser", "Sentence-wise Paraphrases"],
176
  )
177
 
178
  demo.launch(server_port=7860, server_name="0.0.0.0", show_error=True)
 
1
+ # main.py
2
  import gradio as gr
3
  from parrot import Parrot
4
  import nltk
5
  from nltk.tokenize import sent_tokenize, word_tokenize
6
  import re
7
  import time
8
+ import os
9
+ from ddgs import DDGS
10
+ from googleapiclient.discovery import build
11
+ from dotenv import load_dotenv
12
 
13
  # -----------------------------
14
  # Setup
15
  # -----------------------------
16
+ load_dotenv()
17
+ nltk.data.path.append("./nltk_data") # Local punkt
18
  parrot = None # Lazy-loaded global model
19
 
20
+ # Load Google credentials
21
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
22
+ GOOGLE_CX = os.getenv("GOOGLE_CX")
23
 
24
+ if not GOOGLE_API_KEY or not GOOGLE_CX:
25
+ print("⚠️ Warning: GOOGLE_API_KEY or GOOGLE_CX not set. Google fallback may fail.")
26
+
27
+ MAX_TOKENS = 150
28
+
29
+
30
+ # -----------------------------
31
+ # Parrot Model Loader
32
+ # -----------------------------
33
  def get_parrot():
 
34
  global parrot
35
  if parrot is None:
36
+ print("⏳ Loading Parrot model...")
37
  parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)
38
+ print("βœ… Parrot model loaded!")
39
  return parrot
40
 
41
 
 
 
 
42
  # -----------------------------
43
+ # Helper Functions
44
  # -----------------------------
45
  def clean_sentence(sent):
46
  sent = sent.strip()
 
62
  return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
63
 
64
 
 
 
 
65
  def with_retry(func, *args, retries=1, delay=3, **kwargs):
 
66
  for attempt in range(retries + 1):
67
  try:
68
  return func(*args, **kwargs)
 
75
 
76
 
77
  # -----------------------------
78
+ # πŸ”Ή Full Paragraph Rephraser
79
  # -----------------------------
80
  def rephrase(text):
81
  model = get_parrot()
 
103
 
104
 
105
  # -----------------------------
106
+ # πŸ”Ή Sentence-wise Paraphrases
107
  # -----------------------------
108
  def generate_unique_paraphrases(sentence, N_OPTIONS=3):
109
  model = get_parrot()
 
114
  adequacy_threshold=0.85,
115
  fluency_threshold=0.9,
116
  )
 
117
  if not paraphrases:
118
  return [sentence]
 
119
  texts = [p[0] for p in paraphrases]
120
  unique = []
121
  for t in texts:
 
140
 
141
 
142
  # -----------------------------
143
+ # πŸ”Ή Hybrid Plagiarism Detection (DuckDuckGo + Google)
144
+ # -----------------------------
145
+ def search_duckduckgo(query):
146
+ try:
147
+ with DDGS() as ddgs:
148
+ results = list(ddgs.text(f'"{query}"', max_results=3))
149
+ return results, len(results)
150
+ except Exception as e:
151
+ print(f"⚠️ DDG error: {e}")
152
+ return [], 0
153
+
154
+
155
+ def detect_duckduckgo(text):
156
+ sentences = sent_tokenize(text)
157
+ matches = []
158
+ for sent in sentences:
159
+ results, total = search_duckduckgo(sent)
160
+ if total > 0:
161
+ url = results[0].get("href", "Unknown")
162
+ matches.append((sent, total, url))
163
+ time.sleep(1)
164
+ return matches
165
+
166
+
167
+ def search_google(query):
168
+ if not GOOGLE_API_KEY or not GOOGLE_CX:
169
+ return [], 0
170
+ try:
171
+ service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY)
172
+ res = service.cse().list(q=f'"{query}"', cx=GOOGLE_CX, num=3).execute()
173
+ items = res.get("items", [])
174
+ total = int(res.get("searchInformation", {}).get("totalResults", 0))
175
+ return items, total
176
+ except Exception as e:
177
+ print(f"⚠️ Google search error: {e}")
178
+ return [], 0
179
+
180
+
181
+ def detect_google(text):
182
+ sentences = sent_tokenize(text)
183
+ matches = []
184
+ for sent in sentences:
185
+ results, total = search_google(sent)
186
+ if total > 0:
187
+ url = results[0].get("link", "Unknown")
188
+ matches.append((sent, total, url))
189
+ time.sleep(0.3)
190
+ return matches
191
+
192
+
193
+ def hybrid_detect(text):
194
+ ddg_matches = detect_duckduckgo(text)
195
+ matches = ddg_matches if ddg_matches else detect_google(text)
196
+ highlighted = text
197
+ urls = []
198
+ for sent, _, url in matches:
199
+ highlighted = highlighted.replace(sent, f"**{sent}**")
200
+ urls.append(url)
201
+ return {"highlighted_text": highlighted, "sources": urls}
202
+
203
+
204
+ # -----------------------------
205
+ # Warm-up Parrot Model
206
  # -----------------------------
207
  def warmup():
 
 
208
  try:
209
+ print("πŸ”₯ Warming up Parrot model...")
210
  model = get_parrot()
211
  _ = model.augment(input_phrase="hello world", do_diverse=False)
212
  print("βœ… Warmup complete.")
 
218
 
219
 
220
  # -----------------------------
221
+ # πŸ”Ή Gradio UI
222
  # -----------------------------
223
  rephrase_iface = gr.Interface(
224
  fn=rephrase,
225
  inputs=gr.Textbox(lines=10, placeholder="Paste your text here..."),
226
  outputs="text",
227
+ title="🦜 Parrot Rephraser (Long Text)",
228
+ description="Rephrase paragraphs while maintaining meaning.",
229
  )
230
 
231
  sentencewise_iface = gr.Interface(
232
  fn=rephrase_sentencewise_unique,
233
  inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
234
  outputs="text",
235
+ title="🧩 Sentence-wise Paraphraser",
236
+ description="Generates top 3 diverse rephrases per sentence.",
237
+ )
238
+
239
+ plagiarism_iface = gr.Interface(
240
+ fn=hybrid_detect,
241
+ inputs=gr.Textbox(lines=10, placeholder="Paste text to check plagiarism..."),
242
+ outputs=gr.JSON(),
243
+ title="πŸ” Hybrid Plagiarism Detector",
244
+ description="Detects copied sentences using DuckDuckGo & Google Custom Search.",
245
  )
246
 
 
 
 
247
  demo = gr.TabbedInterface(
248
+ [rephrase_iface, sentencewise_iface, plagiarism_iface],
249
+ ["Full Text Rephraser", "Sentence-wise Paraphrases", "Plagiarism Checker"],
250
  )
251
 
252
  demo.launch(server_port=7860, server_name="0.0.0.0", show_error=True)