Spaces:

Mohammedmarzuk17
/

Edushield-AI-Backend

Running

App Files Files Community

Mohammedmarzuk17 commited on 28 days ago

Commit

ce545c8

verified ·

1 Parent(s): 12551e0

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -38

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import gradio as gr
 from transformers import pipeline
 import requests, re, datetime
 from concurrent.futures import ThreadPoolExecutor
-from sentence_transformers import SentenceTransformer, util
 # ---------------------------
 # Load Models
@@ -16,14 +15,11 @@ claim_labels = ["factual claim", "opinion", "personal anecdote", "other"]
 ai_detect_model_name = "roberta-base-openai-detector"
 ai_detector = pipeline("text-classification", model=ai_detect_model_name, device=-1)
-# Semantic similarity model
-sem_model = SentenceTransformer('all-MiniLM-L6-v2')
 # ---------------------------
 # Google Search Config
 # ---------------------------
-GOOGLE_API_KEY = "YOUR_GOOGLE_API_KEY"
-GOOGLE_CX = "YOUR_GOOGLE_CX"
 google_quota = {"count": 0, "date": datetime.date.today()}
 GOOGLE_DAILY_LIMIT = 100
@@ -42,9 +38,15 @@ def safe_split_text(text):
 # ---------------------------
 # Claim Extraction
 # ---------------------------
-def extract_claims(page_text, max_claims=20):
     sentences = safe_split_text(page_text)
     def classify_sentence(s):
         out = claim_classifier(s, claim_labels)
         label_priority = ["factual claim", "opinion", "personal anecdote"]
@@ -53,12 +55,14 @@ def extract_claims(page_text, max_claims=20):
                 return {"text": s, "label": lbl, "score": round(out["scores"][out["labels"].index(lbl)], 3)}
         return None
     results = []
     with ThreadPoolExecutor() as executor:
         for r in executor.map(classify_sentence, sentences):
             if r:
                 results.append(r)
     results = sorted(results, key=lambda x: -len(x["text"]))[:max_claims]
     return results
@@ -66,6 +70,7 @@ def extract_claims(page_text, max_claims=20):
 # AI Text Detection
 # ---------------------------
 def detect_ai(texts):
     if isinstance(texts, str):
         texts = [texts]
     results = []
@@ -79,56 +84,40 @@ def detect_ai(texts):
 # ---------------------------
 # Google Evidence Gathering
 # ---------------------------
-def fetch_google_search(claim, num_results=10):
-    """
-    Returns top 3 keyword results as before
-    and top 3 semantic results
-    """
     global google_quota
     today = datetime.date.today()
     if google_quota["date"] != today:
         google_quota = {"count": 0, "date": today}
     if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
-        return {
-            "keyword_results": ["[Google] Daily quota reached (100 queries)."],
-            "semantic_results": ["[Google] Daily quota reached (100 queries)."]
-        }
     try:
-        url = f"https://www.googleapis.com/customsearch/v1?q={requests.utils.quote(claim)}&key={GOOGLE_API_KEY}&cx={GOOGLE_CX}&num={num_results}"
         r = requests.get(url).json()
         google_quota["count"] += 1
         items = r.get("items", [])
-        # Keyword-based results (top 3)
-        keyword_results = [f"{item['title']}: {item['snippet']}" for item in items[:3]]
-        # Semantic-based results (top 3 by similarity)
-        semantic_results = []
-        if items:
-            claim_emb = sem_model.encode(claim, convert_to_tensor=True)
-            snippets = [f"{item['title']}: {item['snippet']}" for item in items]
-            snippet_embs = sem_model.encode(snippets, convert_to_tensor=True)
-            sims = util.cos_sim(claim_emb, snippet_embs)[0]
-            top_indices = sims.argsort(descending=True)[:3]
-            semantic_results = [snippets[i] for i in top_indices]
-        return {"keyword_results": keyword_results, "semantic_results": semantic_results}
-    except Exception as e:
-        return {"keyword_results": [], "semantic_results": []}
 # ---------------------------
 # Unified Predict Function
 # ---------------------------
 def predict(user_text=""):
     if not user_text.strip():
         return {"error": "No text provided."}
     # --- Full text analysis ---
     full_ai_result = detect_ai(user_text)
-    # Strict split by '.' to preserve full sentences
     dot_sentences = [s.strip() for s in user_text.split('.') if s.strip()]
     full_fact_checking = {s: fetch_google_search(s) for s in dot_sentences}
@@ -136,7 +125,7 @@ def predict(user_text=""):
     claims_data = extract_claims(user_text)
     claims_texts = [c["text"] for c in claims_data]
     claims_ai_results = detect_ai(claims_texts) if claims_texts else []
-    claims_fact_checking = {c["text"]: fetch_google_search(c["text"]) for c in claims_data}
     return {
         "full_text": {
@@ -146,7 +135,7 @@ def predict(user_text=""):
         },
         "claims": claims_data,
         "claims_ai_detection": claims_ai_results,
-        "claims_fact_checking": claims_fact_checking,
         "google_quota_used": google_quota["count"],
         "google_quota_reset": str(datetime.datetime.combine(
             google_quota["date"] + datetime.timedelta(days=1),

 from transformers import pipeline
 import requests, re, datetime
 from concurrent.futures import ThreadPoolExecutor
 # ---------------------------
 # Load Models
 ai_detect_model_name = "roberta-base-openai-detector"
 ai_detector = pipeline("text-classification", model=ai_detect_model_name, device=-1)
 # ---------------------------
 # Google Search Config
 # ---------------------------
+GOOGLE_API_KEY = "AIzaSyAC56onKwR17zd_djUPEfGXQACy9qRjDxw"
+GOOGLE_CX = "87391aed073954cae"
 google_quota = {"count": 0, "date": datetime.date.today()}
 GOOGLE_DAILY_LIMIT = 100
 # ---------------------------
 # Claim Extraction
 # ---------------------------
+def extract_claims(page_text, max_claims=20, batch_size=50):
+    """
+    Extract top claims from text:
+    - Uses safe_split_text for splitting.
+    - Classifies each piece into factual claim, opinion, or anecdote.
+    """
     sentences = safe_split_text(page_text)
+    # Step 1: Function to classify a single sentence
     def classify_sentence(s):
         out = claim_classifier(s, claim_labels)
         label_priority = ["factual claim", "opinion", "personal anecdote"]
                 return {"text": s, "label": lbl, "score": round(out["scores"][out["labels"].index(lbl)], 3)}
         return None
+    # Step 2: Threaded classification
     results = []
     with ThreadPoolExecutor() as executor:
         for r in executor.map(classify_sentence, sentences):
             if r:
                 results.append(r)
+    # Step 3: Limit top claims
     results = sorted(results, key=lambda x: -len(x["text"]))[:max_claims]
     return results
 # AI Text Detection
 # ---------------------------
 def detect_ai(texts):
+    """Detect whether input text is AI-generated or human-written."""
     if isinstance(texts, str):
         texts = [texts]
     results = []
 # ---------------------------
 # Google Evidence Gathering
 # ---------------------------
+def fetch_google_search(claim):
     global google_quota
     today = datetime.date.today()
     if google_quota["date"] != today:
         google_quota = {"count": 0, "date": today}
     if google_quota["count"] >= GOOGLE_DAILY_LIMIT:
+        return ["[Google] Daily quota reached (100 queries)."]
     try:
+        url = f"https://www.googleapis.com/customsearch/v1?q={requests.utils.quote(claim)}&key={GOOGLE_API_KEY}&cx={GOOGLE_CX}"
         r = requests.get(url).json()
         google_quota["count"] += 1
         items = r.get("items", [])
+        return [f"{item['title']}: {item['snippet']}" for item in items[:3]]  # top 3 results
+    except Exception:
+        return []
 # ---------------------------
 # Unified Predict Function
 # ---------------------------
 def predict(user_text=""):
+    """
+    Runs both:
+    1. Full-text analysis (AI detection on entire input + sentence-based fact-check)
+    2. Claim-extracted analysis (claim split + AI detection + fact-check)
+    """
     if not user_text.strip():
         return {"error": "No text provided."}
     # --- Full text analysis ---
     full_ai_result = detect_ai(user_text)
+    # NEW: Split strictly by '.' to preserve full user input sentences
     dot_sentences = [s.strip() for s in user_text.split('.') if s.strip()]
     full_fact_checking = {s: fetch_google_search(s) for s in dot_sentences}
     claims_data = extract_claims(user_text)
     claims_texts = [c["text"] for c in claims_data]
     claims_ai_results = detect_ai(claims_texts) if claims_texts else []
+    fact_checking = {c["text"]: fetch_google_search(c["text"]) for c in claims_data}
     return {
         "full_text": {
         },
         "claims": claims_data,
         "claims_ai_detection": claims_ai_results,
+        "claims_fact_checking": fact_checking,
         "google_quota_used": google_quota["count"],
         "google_quota_reset": str(datetime.datetime.combine(
             google_quota["date"] + datetime.timedelta(days=1),