Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import numpy as np | |
| from scipy.special import expit | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| # Load Model and Tokenizer | |
| MODEL = "cardiffnlp/tweet-topic-21-multi" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL) | |
| class_mapping = model.config.id2label | |
| # Text Analyzer | |
| def analyze_topics(text): | |
| detected_topics = [] | |
| inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) | |
| outputs = model(**inputs) | |
| scores = outputs.logits[0].detach().numpy() | |
| scores = expit(scores) | |
| predictions = (scores >= 0.5).astype(int) | |
| for i, pred in enumerate(predictions): | |
| if pred: | |
| topic_name = class_mapping[i] | |
| confidence = scores[i] | |
| detected_topics.append(f"• {topic_name} ({confidence:.2f})") | |
| if detected_topics: | |
| return "\n".join(detected_topics) | |
| else: | |
| return "No specific topics detected." | |
| # Document Analyzer Helpers | |
| def extract_text_from_file(file_path): | |
| ext = os.path.splitext(file_path)[1].lower() | |
| if ext == ".pdf": | |
| reader = PdfReader(file_path) | |
| text = " ".join([page.extract_text() for page in reader.pages if page.extract_text()]) | |
| elif ext == ".docx": | |
| doc = Document(file_path) | |
| text = "\n".join([p.text for p in doc.paragraphs]) | |
| elif ext == ".txt": | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| text = f.read() | |
| else: | |
| raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.") | |
| return text.strip() | |
| def analyze_document(file): | |
| if file is None: | |
| return "Please upload a document first." | |
| text = extract_text_from_file(file.name) | |
| if not text: | |
| return "No readable text found in document." | |
| # Split into chunks for large docs | |
| words = text.split() | |
| chunks = [" ".join(words[i:i + 400]) for i in range(0, len(words), 400)] | |
| all_detected_topics = {} | |
| for chunk in chunks: | |
| inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512) | |
| outputs = model(**inputs) | |
| scores = outputs.logits[0].detach().numpy() | |
| scores = expit(scores) | |
| predictions = (scores >= 0.5).astype(int) | |
| for i, pred in enumerate(predictions): | |
| if pred: | |
| topic_name = class_mapping[i] | |
| confidence = scores[i] | |
| all_detected_topics.setdefault(topic_name, []).append(confidence) | |
| if all_detected_topics: | |
| summary = [ | |
| f"• {topic} (avg confidence: {np.mean(confs):.2f})" | |
| for topic, confs in all_detected_topics.items() | |
| ] | |
| summary.sort(key=lambda x: float(x.split(': ')[-1].rstrip(')')), reverse=True) | |
| return "\n".join(summary) | |
| else: | |
| return "No specific topics detected in document." | |
| # ------------------------- | |
| # Custom CSS for Dark Theme | |
| # ------------------------- | |
| css = """ | |
| body, .gradio-container { | |
| background-color: #1a1a1a !important; | |
| color: #f5f5f5 !important; | |
| font-family: 'Inter', sans-serif !important; | |
| } | |
| h1, h2, h3, label { | |
| color: #ff9900 !important; | |
| font-weight: 600 !important; | |
| } | |
| textarea, input, .upload-box, .gr-box { | |
| background-color: #2a2a2a !important; | |
| color: #f5f5f5 !important; | |
| border: 1px solid #3a3a3a !important; | |
| border-radius: 10px !important; | |
| } | |
| button { | |
| background-color: #ff9900 !important; | |
| color: #1a1a1a !important; | |
| font-weight: 600 !important; | |
| border-radius: 8px !important; | |
| border: none !important; | |
| transition: 0.25s ease-in-out; | |
| } | |
| button:hover { | |
| background-color: #ffb84d !important; | |
| } | |
| .output-textbox { | |
| background-color: #252525 !important; | |
| color: #ffd480 !important; | |
| border: 1px solid #3a3a3a !important; | |
| border-radius: 10px !important; | |
| } | |
| .gr-tabs, .tabitem { | |
| background-color: transparent !important; | |
| } | |
| footer, .footer, .svelte-1xdkkgx { | |
| background: none !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| color: #888 !important; | |
| text-align: center !important; | |
| } | |
| """ | |
| # ------------------------- | |
| # Layout Using Blocks | |
| # ------------------------- | |
| with gr.Blocks(css=css, theme=gr.themes.Base(primary_hue="orange")) as app: | |
| gr.Markdown("<h1 style='text-align:center;'>🧠 AI Topic Analyzer</h1>") | |
| gr.Markdown( | |
| "Analyze text or upload a document to detect key topics using CardiffNLP’s Tweet Topic model." | |
| ) | |
| with gr.Tabs(): | |
| with gr.Tab("💬 Text Analyzer"): | |
| text_input = gr.Textbox( | |
| label="📝 Enter Text", placeholder="Type or paste text here...", lines=4 | |
| ) | |
| text_output = gr.Textbox(label="🎯 Detected Topics", elem_classes=["output-textbox"]) | |
| analyze_text_btn = gr.Button("Analyze Text") | |
| analyze_text_btn.click(analyze_topics, inputs=text_input, outputs=text_output) | |
| with gr.Tab("📄 Document Analyzer"): | |
| file_input = gr.File(label="📄 Upload PDF, DOCX, or TXT", file_types=[".pdf", ".docx", ".txt"]) | |
| doc_output = gr.Textbox(label="📘 Detected Topics", elem_classes=["output-textbox"]) | |
| analyze_doc_btn = gr.Button("Analyze Document") | |
| analyze_doc_btn.click(analyze_document, inputs=file_input, outputs=doc_output) | |
| gr.Markdown("<p style='text-align:center; color:#888;'>Built with ❤️ using Gradio & Transformers</p>") | |
| # ------------------------- | |
| # Launch | |
| # ------------------------- | |
| if __name__ == "__main__": | |
| app.launch() |