Spaces:

DTabs
/

rephrase

Running

App Files Files Community

DTabs commited on about 1 month ago

Commit

834bfd1

verified ·

1 Parent(s): 4a0362b

Update app.py

Browse files

Files changed (1) hide show

app.py +112 -42

app.py CHANGED Viewed

@@ -3,14 +3,14 @@ from parrot import Parrot
 import nltk
 from nltk.tokenize import sent_tokenize, word_tokenize
 import re
 # -----------------------------
 # Setup
 # -----------------------------
-nltk.data.path.append("./nltk_data")  # use local punkt
-# -----------------------------
-# Lazy-load Parrot model
-# -----------------------------
-parrot = None  # global variable
 def get_parrot():
     global parrot
     if parrot is None:
@@ -18,56 +18,126 @@ def get_parrot():
         parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)
         print("✅ Parrot model loaded successfully!")
     return parrot
-MAX_TOKENS = 150  # max words per chunk for long sentences
 # -----------------------------
-# Helper Functions
 # -----------------------------
 def split_long_sentence(sentence, max_tokens=MAX_TOKENS):
     words = word_tokenize(sentence)
-    chunks = []
-    for i in range(0, len(words), max_tokens):
-        chunk = " ".join(words[i:i+max_tokens])
-        chunks.append(chunk)
-    return chunks
-def clean_sentences(sentences):
-    cleaned = []
-    for s in sentences:
-        s = s.strip()
-        if s:
-            s = re.sub(r'[.!?]+$', '', s)
-            s = re.sub(r'\s{2,}', ' ', s)
-            s = s[0].upper() + s[1:]
-            cleaned.append(s)
-    result = ". ".join(cleaned)
-    if result and not result.endswith("."):
-        result += "."
-    return result
 def rephrase(text):
-    model = get_parrot()  # ensures model is loaded only once
     sentences = sent_tokenize(text)
     rephrased = []
     for s in sentences:
         chunks = split_long_sentence(s)
         paraphrased_chunks = []
         for c in chunks:
-            p = model.augment(
-                input_phrase=c,
-                do_diverse=True,
-                adequacy_threshold=0.85,
-                fluency_threshold=0.9
-            )
-            paraphrased_chunks.append(p[0][0] if p else c)
-        full_sentence = " ".join(paraphrased_chunks)
-        rephrased.append(full_sentence)
     return clean_sentences(rephrased)
-# -----------------------------
-# Gradio Interface
-# -----------------------------
-iface = gr.Interface(
     fn=rephrase,
     inputs=gr.Textbox(lines=10, placeholder="Paste your text here..."),
     outputs="text",
-    title="Parrot Rephraser",
-    description="Paraphrase long text while keeping proper capitalization and punctuation."
 )
-iface.launch()

 import nltk
 from nltk.tokenize import sent_tokenize, word_tokenize
 import re
 # -----------------------------
 # Setup
 # -----------------------------
+nltk.data.path.append("./nltk_data")  # Local punkt
+parrot = None  # Lazy-loaded global model
 def get_parrot():
     global parrot
     if parrot is None:
         parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)
         print("✅ Parrot model loaded successfully!")
     return parrot
+MAX_TOKENS = 150  # limit per chunk for stability
 # -----------------------------
+# Helper: Common utilities
 # -----------------------------
+def clean_sentence(sent):
+    sent = sent.strip()
+    sent = re.sub(r"[.!?]+$", "", sent)
+    if sent:
+        sent = sent[0].upper() + sent[1:]
+    if not sent.endswith("."):
+        sent += "."
+    return sent
+def clean_sentences(sentences):
+    cleaned = [clean_sentence(s) for s in sentences if s.strip()]
+    return " ".join(cleaned)
 def split_long_sentence(sentence, max_tokens=MAX_TOKENS):
     words = word_tokenize(sentence)
+    return [" ".join(words[i:i + max_tokens]) for i in range(0, len(words), max_tokens)]
+# -----------------------------
+# 🔹 App 1: Full Paragraph Rephraser
+# -----------------------------
 def rephrase(text):
+    model = get_parrot()
     sentences = sent_tokenize(text)
     rephrased = []
     for s in sentences:
         chunks = split_long_sentence(s)
         paraphrased_chunks = []
         for c in chunks:
+            try:
+                p = model.augment(
+                    input_phrase=c,
+                    do_diverse=True,
+                    adequacy_threshold=0.85,
+                    fluency_threshold=0.9,
+                )
+                paraphrased_chunks.append(p[0][0] if p else c)
+            except Exception:
+                paraphrased_chunks.append(c)
+        rephrased.append(" ".join(paraphrased_chunks))
     return clean_sentences(rephrased)
+rephrase_iface = gr.Interface(
     fn=rephrase,
     inputs=gr.Textbox(lines=10, placeholder="Paste your text here..."),
     outputs="text",
+    title="Parrot Rephraser (Long Text)",
+    description="Paraphrases long text while maintaining punctuation and capitalization.",
+)
+# -----------------------------
+# 🔹 App 2: Sentence-wise Multiple Paraphrases
+# -----------------------------
+def generate_unique_paraphrases(sentence, N_OPTIONS=3):
+    model = get_parrot()
+    try:
+        paraphrases = model.augment(
+            input_phrase=sentence,
+            do_diverse=True,
+            adequacy_threshold=0.85,
+            fluency_threshold=0.9,
+        )
+    except Exception:
+        paraphrases = []
+    if paraphrases:
+        texts = [p[0] for p in paraphrases]
+        unique = []
+        for t in texts:
+            if t not in unique:
+                unique.append(t)
+            if len(unique) == N_OPTIONS:
+                break
+        return unique
+    else:
+        return [sentence]
+def rephrase_sentencewise_unique(text, N_OPTIONS=3):
+    sentences = sent_tokenize(text.strip())
+    results = []
+    for idx, s in enumerate(sentences, 1):
+        paraphrases = generate_unique_paraphrases(s, N_OPTIONS)
+        paraphrases = [clean_sentence(p) for p in paraphrases]
+        formatted = f"Sentence {idx}: {s}\n"
+        for i, opt in enumerate(paraphrases, 1):
+            formatted += f"  Option {i}: {opt}\n"
+        results.append(formatted)
+    return "\n".join(results)
+sentencewise_iface = gr.Interface(
+    fn=rephrase_sentencewise_unique,
+    inputs=gr.Textbox(lines=10, placeholder="Paste text here..."),
+    outputs="text",
+    title="Parrot Rephraser (Sentence-wise Options)",
+    description="Generates top 3 unique paraphrases per sentence. Optimized for HF free-tier.",
+)
+# -----------------------------
+# 🔹 Combine both interfaces into Tabs
+# -----------------------------
+demo = gr.TabbedInterface(
+    [rephrase_iface, sentencewise_iface],
+    ["Full Text Rephraser", "Sentence-wise Paraphrases"],
 )
+demo.launch()