import gradio as gr import os import numpy as np from scipy.special import expit from transformers import AutoTokenizer, AutoModelForSequenceClassification from PyPDF2 import PdfReader from docx import Document # Load Model and Tokenizer MODEL = "cardiffnlp/tweet-topic-21-multi" tokenizer = AutoTokenizer.from_pretrained(MODEL) model = AutoModelForSequenceClassification.from_pretrained(MODEL) class_mapping = model.config.id2label # Text Analyzer def analyze_topics(text): detected_topics = [] inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) outputs = model(**inputs) scores = outputs.logits[0].detach().numpy() scores = expit(scores) predictions = (scores >= 0.5).astype(int) for i, pred in enumerate(predictions): if pred: topic_name = class_mapping[i] confidence = scores[i] detected_topics.append(f"• {topic_name} ({confidence:.2f})") if detected_topics: return "\n".join(detected_topics) else: return "No specific topics detected." # Document Analyzer Helpers def extract_text_from_file(file_path): ext = os.path.splitext(file_path)[1].lower() if ext == ".pdf": reader = PdfReader(file_path) text = " ".join([page.extract_text() for page in reader.pages if page.extract_text()]) elif ext == ".docx": doc = Document(file_path) text = "\n".join([p.text for p in doc.paragraphs]) elif ext == ".txt": with open(file_path, "r", encoding="utf-8") as f: text = f.read() else: raise ValueError("Unsupported file format. Please upload a PDF, DOCX, or TXT file.") return text.strip() def analyze_document(file): if file is None: return "Please upload a document first." text = extract_text_from_file(file.name) if not text: return "No readable text found in document." # Split into chunks for large docs words = text.split() chunks = [" ".join(words[i:i + 400]) for i in range(0, len(words), 400)] all_detected_topics = {} for chunk in chunks: inputs = tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512) outputs = model(**inputs) scores = outputs.logits[0].detach().numpy() scores = expit(scores) predictions = (scores >= 0.5).astype(int) for i, pred in enumerate(predictions): if pred: topic_name = class_mapping[i] confidence = scores[i] all_detected_topics.setdefault(topic_name, []).append(confidence) if all_detected_topics: summary = [ f"• {topic} (avg confidence: {np.mean(confs):.2f})" for topic, confs in all_detected_topics.items() ] summary.sort(key=lambda x: float(x.split(': ')[-1].rstrip(')')), reverse=True) return "\n".join(summary) else: return "No specific topics detected in document." # ------------------------- # Custom CSS for Dark Theme # ------------------------- css = """ body, .gradio-container { background-color: #1a1a1a !important; color: #f5f5f5 !important; font-family: 'Inter', sans-serif !important; } h1, h2, h3, label { color: #ff9900 !important; font-weight: 600 !important; } textarea, input, .upload-box, .gr-box { background-color: #2a2a2a !important; color: #f5f5f5 !important; border: 1px solid #3a3a3a !important; border-radius: 10px !important; } button { background-color: #ff9900 !important; color: #1a1a1a !important; font-weight: 600 !important; border-radius: 8px !important; border: none !important; transition: 0.25s ease-in-out; } button:hover { background-color: #ffb84d !important; } .output-textbox { background-color: #252525 !important; color: #ffd480 !important; border: 1px solid #3a3a3a !important; border-radius: 10px !important; } .gr-tabs, .tabitem { background-color: transparent !important; } footer, .footer, .svelte-1xdkkgx { background: none !important; border: none !important; box-shadow: none !important; color: #888 !important; text-align: center !important; } """ # ------------------------- # Layout Using Blocks # ------------------------- with gr.Blocks(css=css, theme=gr.themes.Base(primary_hue="orange")) as app: gr.Markdown("

🧠 AI Topic Analyzer

") gr.Markdown( "Analyze text or upload a document to detect key topics using CardiffNLP’s Tweet Topic model." ) with gr.Tabs(): with gr.Tab("💬 Text Analyzer"): text_input = gr.Textbox( label="📝 Enter Text", placeholder="Type or paste text here...", lines=4 ) text_output = gr.Textbox(label="🎯 Detected Topics", elem_classes=["output-textbox"]) analyze_text_btn = gr.Button("Analyze Text") analyze_text_btn.click(analyze_topics, inputs=text_input, outputs=text_output) with gr.Tab("📄 Document Analyzer"): file_input = gr.File(label="📄 Upload PDF, DOCX, or TXT", file_types=[".pdf", ".docx", ".txt"]) doc_output = gr.Textbox(label="📘 Detected Topics", elem_classes=["output-textbox"]) analyze_doc_btn = gr.Button("Analyze Document") analyze_doc_btn.click(analyze_document, inputs=file_input, outputs=doc_output) gr.Markdown("

Built with ❤️ using Gradio & Transformers

") # ------------------------- # Launch # ------------------------- if __name__ == "__main__": app.launch()