Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, T5ForConditionalGeneration, pipeline | |
| from sentence_transformers import SentenceTransformer, util | |
| import requests | |
| import random | |
| import warnings | |
| from transformers import logging | |
| import os | |
| import tensorflow as tf | |
| # Set environment configurations | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |
| tf.get_logger().setLevel('ERROR') | |
| warnings.filterwarnings("ignore") | |
| logging.set_verbosity_error() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GROQ_API_KEY: | |
| raise ValueError("GROQ_API_KEY is not set. Please add it to the Secrets in your Hugging Face Space settings.") | |
| def segment_into_sentences_groq(passage): | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| payload = { | |
| "model": "llama3-8b-8192", | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": "you are to segment the sentence by adding '1!2@3#' at the end of each sentence. Return only the segmented sentences only return the modified passage and nothing else do not add your responses" | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"you are to segment the sentence by adding '1!2@3#' at the end of each sentence. Return only the segmented sentences only return the modified passage and nothing else do not add your responses. here is the passage:{passage}" | |
| } | |
| ], | |
| "temperature": 1.0, | |
| "max_tokens": 8192 | |
| } | |
| response = requests.post("https://api.groq.com/openai/v1/chat/completions", json=payload, headers=headers) | |
| if response.status_code == 200: | |
| data = response.json() | |
| try: | |
| segmented_text = data.get("choices", [{}])[0].get("message", {}).get("content", "") | |
| sentences = segmented_text.split("1!2@3#") | |
| return [sentence.strip() for sentence in sentences if sentence.strip()] | |
| except (IndexError, KeyError): | |
| raise ValueError("Unexpected response structure from Groq API.") | |
| else: | |
| raise ValueError(f"Groq API error: {response.text}") | |
| class TextEnhancer: | |
| def __init__(self): | |
| self.device = "cuda" if torch.cuda.is_available() else "cpu" | |
| self.paraphrase_tokenizer = AutoTokenizer.from_pretrained("prithivida/parrot_paraphraser_on_T5") | |
| self.paraphrase_model = T5ForConditionalGeneration.from_pretrained("prithivida/parrot_paraphraser_on_T5").to(self.device) | |
| self.grammar_pipeline = pipeline( | |
| "text2text-generation", | |
| model="Grammarly/coedit-large", | |
| device=0 if self.device == "cuda" else -1 | |
| ) | |
| self.similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2').to(self.device) | |
| def enhance_text(self, text, min_similarity=0.8, max_variations=3): | |
| sentences = segment_into_sentences_groq(text) | |
| enhanced_sentences = [] | |
| for sentence in sentences: | |
| if not sentence.strip(): | |
| continue | |
| inputs = self.paraphrase_tokenizer( | |
| f"paraphrase: {sentence}", | |
| return_tensors="pt", | |
| padding=True, | |
| max_length=150, | |
| truncation=True | |
| ).to(self.device) | |
| outputs = self.paraphrase_model.generate( | |
| **inputs, | |
| max_length=len(sentence.split()) + 20, | |
| num_return_sequences=max_variations, | |
| num_beams=max_variations, | |
| temperature=0.7 | |
| ) | |
| paraphrases = [ | |
| self.paraphrase_tokenizer.decode(output, skip_special_tokens=True) | |
| for output in outputs | |
| ] | |
| sentence_embedding = self.similarity_model.encode(sentence) | |
| paraphrase_embeddings = self.similarity_model.encode(paraphrases) | |
| similarities = util.cos_sim(sentence_embedding, paraphrase_embeddings) | |
| valid_paraphrases = [ | |
| para for para, sim in zip(paraphrases, similarities[0]) | |
| if sim >= min_similarity | |
| ] | |
| if valid_paraphrases: | |
| corrected = self.grammar_pipeline( | |
| valid_paraphrases[0], | |
| max_length=150, | |
| num_return_sequences=1 | |
| )[0]["generated_text"] | |
| enhanced_sentences.append(corrected) | |
| else: | |
| enhanced_sentences.append(sentence) | |
| enhanced_text = ". ".join(sentence.rstrip(".") for sentence in enhanced_sentences) + "." | |
| return enhanced_text | |
| def create_interface(): | |
| enhancer = TextEnhancer() | |
| def process_text(text, similarity_threshold): | |
| try: | |
| return enhancer.enhance_text( | |
| text, | |
| min_similarity=similarity_threshold / 100 | |
| ) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| interface = gr.Interface( | |
| fn=process_text, | |
| inputs=[ | |
| gr.Textbox(label="Input Text", placeholder="Enter text to enhance...", lines=10), | |
| gr.Slider(minimum=50, maximum=100, value=80, label="Minimum Semantic Similarity (%)") | |
| ], | |
| outputs=gr.Textbox(label="Enhanced Text", lines=10), | |
| title="Text Enhancement System", | |
| description="Improve text quality while preserving original meaning" | |
| ) | |
| return interface | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch() | |