Spaces:

aubynsamuel05
/

nli_checks

Sleeping

App Files Files Community

aubynsamuel05 commited on Jul 15

Commit

d51c3cd

1 Parent(s): f078461

fixed claim verification calculations

Browse files

* check claim against entire paragraph instead of individual sentences

Files changed (5) hide show

.gitIgnore +6 -0
.gitattributes +1 -5
deploy/main/claim_verifier.py +21 -28
requirements.txt +3 -1
semantic_similarity.py +16 -48

.gitIgnore ADDED Viewed

	@@ -0,0 +1,6 @@

+.vscode
+.venv
+venv
+**/__pycache__/
+snli_1.0_dev.jsonl
+api_call.py

.gitattributes CHANGED Viewed

@@ -33,8 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
-.vscode
-.venv
-venv
-**/__pycache__/
-snli_1.0_dev.jsonl

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

deploy/main/claim_verifier.py CHANGED Viewed

@@ -208,9 +208,7 @@ class ClaimVerifier:
     def _get_from_cache(self, key: str) -> Optional[Dict]:
         return self.claim_cache.get(key)
-    def _semantic_similarity_with_sentences(
-        self, claim: str, sentences: List[str]
-    ) -> float:
         """Calculate entailment scores and return the best one."""
         try:
             score = calculate_semantic_similarity(claim, sentences)
@@ -260,15 +258,6 @@ class ClaimVerifier:
                                 result
                             )
-                            # Enhanced Logging Format
-                            logging.info(f"\nSource: {url} ({domain_type})")
-                            # logging.info(
-                            #     f"  - Relevant Sentences: {sentences[:3]}"
-                            # )  # Log first 2 sentences
-                            logging.info(
-                                f"  - Entailment Score: {similarity_score:.2f}"
-                            )
                             total_weight += domain_weight
                             if similarity_score >= 0.4:
                                 support_scores.append(similarity_score * domain_weight)
@@ -282,6 +271,10 @@ class ClaimVerifier:
                                     "relevant_sentences": sentences[:3],
                                 }
                             )
                     except Exception as e:
                         logging.error(f"Error processing {url}: {e}")
             except TimeoutError:
@@ -290,12 +283,12 @@ class ClaimVerifier:
         support_sum = sum(support_scores)
         if total_weight > 0:
-            final_score = min(1.0, support_sum / len(support_scores))
             # Adjustments
-            # if final_score < 0.5 and support_sum < 0.5:
-            #     final_score *= 0.8
-            # elif final_score > 0.5 and support_sum >= 1.0:
-            #     final_score = min(0.9, final_score * 1.1)
         else:
             final_score = 0.1
@@ -347,19 +340,19 @@ class ClaimVerifier:
             if not relevant_sentences:
                 return None
-            # cleaned_content = ""
-            # for sentence in relevant_sentences:
-            #     if (
-            #         sentence.endswith(".")
-            #         or sentence.endswith("?")
-            #         or sentence.endswith("!")
-            #     ):
-            #         cleaned_content += f"{sentence} "
-            #     else:
-            #         cleaned_content += f"{sentence}. "
             semantic_similarity = self._semantic_similarity_with_sentences(
-                claim, relevant_sentences
             )
             domain_weight, domain_type = self._get_domain_weight(url)

     def _get_from_cache(self, key: str) -> Optional[Dict]:
         return self.claim_cache.get(key)
+    def _semantic_similarity_with_sentences(self, claim: str, sentences: str) -> float:
         """Calculate entailment scores and return the best one."""
         try:
             score = calculate_semantic_similarity(claim, sentences)
                                 result
                             )
                             total_weight += domain_weight
                             if similarity_score >= 0.4:
                                 support_scores.append(similarity_score * domain_weight)
                                     "relevant_sentences": sentences[:3],
                                 }
                             )
+                            for source_detail in source_details:
+                                logging.info(f"Source Details:\n{source_detail}\n")
                     except Exception as e:
                         logging.error(f"Error processing {url}: {e}")
             except TimeoutError:
         support_sum = sum(support_scores)
         if total_weight > 0:
+            final_score = min(1.0, support_sum / total_weight)
             # Adjustments
+            if final_score < 0.5:
+                final_score *= 0.9
+            elif final_score > 0.5:
+                final_score *= 1.1
         else:
             final_score = 0.1
             if not relevant_sentences:
                 return None
+            cleaned_content = ""
+            for sentence in relevant_sentences:
+                if (
+                    sentence.endswith(".")
+                    or sentence.endswith("?")
+                    or sentence.endswith("!")
+                ):
+                    cleaned_content += f"{sentence} "
+                else:
+                    cleaned_content += f"{sentence}. "
             semantic_similarity = self._semantic_similarity_with_sentences(
+                claim, cleaned_content
             )
             domain_weight, domain_type = self._get_domain_weight(url)

requirements.txt CHANGED Viewed

@@ -10,4 +10,6 @@ nltk==3.9.1
 sentence-transformers==4.1.0
 torch==2.7.1
 scikit-learn==1.6.1
-textblob==0.19.0

 sentence-transformers==4.1.0
 torch==2.7.1
 scikit-learn==1.6.1
+textblob==0.19.0
+gradio==5.37.0
+gradio_client==1.10.4

semantic_similarity.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-from typing import List
 import torch
 from sentence_transformers import SentenceTransformer, util
 from textblob import TextBlob
@@ -10,54 +9,35 @@ model = SentenceTransformer("paraphrase-MiniLM-L12-v2")
 model.eval()
-def calculate_semantic_similarity(
-    claim: str, sentences: List[str], similarity_threshold: float = 0.4
-) -> float:
     """
     Calculates a weighted score representing how well a list of sentences supports a claim.
     Args:
         claim (str): The claim to be verified.
-        sentences (List[str]): A list of sentences to check against the claim.
-        similarity_threshold (float, optional): The minimum similarity score for a
-                                               sentence to be considered "supporting". Defaults to 0.5.
     Returns:
         float: A weighted score between 0.0 and 1.0.
     """
-    if not sentences:
-        return 0.0
-    all_scores = []
     with torch.no_grad():
         claim_embedding = model.encode(claim, show_progress_bar=False)
-        sentence_embeddings = model.encode(sentences, show_progress_bar=False)
-        cosine_scores = util.cos_sim(claim_embedding, sentence_embeddings)[0]
-        claim_sentiment = TextBlob(claim).sentiment.polarity
-        for i, sentence in enumerate(sentences):
-            similarity = cosine_scores[i].item()
-            sentence_sentiment = TextBlob(sentence).sentiment.polarity
-            if claim_sentiment * sentence_sentiment > 0:
-                similarity *= 1.1
-            elif claim_sentiment * sentence_sentiment < 0:
-                similarity *= 0.9
-            # print(f"Sentence: {sentence}\nSimilarity: {similarity:.2f}\n")
-            similarity = max(0.0, min(1.0, similarity))
-            all_scores.append(similarity)
-    supporting_scores = [s for s in all_scores if s >= similarity_threshold]
-    proportion_supporting = len(supporting_scores) / len(sentences)
-    if proportion_supporting >= 0.30:
-        final_score = sum(supporting_scores) / len(supporting_scores)
-    else:
-        average_all_scores = sum(all_scores) / len(all_scores)
-        # penalty = 0.80  # 20% reduction
-        final_score = average_all_scores  # * penalty
     return final_score
@@ -65,20 +45,8 @@ if __name__ == "__main__":
     while True:
         claim_to_verify = input("Enter claim to verify: ")
         evidence = input("Enter evidence sentences: ")
-        evidence_sentences = [
-            "The recent legislation is projected to stimulate significant economic growth.",  # High similarity
-            "Market analysts are optimistic about the financial future following the announcement.",  # High similarity
-            "However, some critics argue that the policy might lead to unforeseen inflation.",  # Low similarity
-            "The stock market reacted positively, showing a slight increase.",  # Medium similarity
-            "This is considered a poor decision for the nation's financial stability by some experts.",  # Opposing sentiment
-            "The primary goal of the initiative is to create jobs and encourage consumer spending.",  # High similarity
-            "Unemployment rates are expected to decline in the coming months.",  # High similarity
-            "There has been some public disapproval regarding the policy's rollout.",  # Low similarity
-            "This will surely lead to a stronger and more resilient economy.",  # High similarity
-            "Financial experts have voiced concerns about the potential long-term consequences.",  # Opposing sentiment
-        ]
-        final_score = calculate_semantic_similarity(claim_to_verify, [evidence.strip()])
         print(f"The final weighted support score for the claim is: {final_score:.4f}")

 import os
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 import torch
 from sentence_transformers import SentenceTransformer, util
 from textblob import TextBlob
 model.eval()
+def calculate_semantic_similarity(claim: str, sentence: str) -> float:
     """
     Calculates a weighted score representing how well a list of sentences supports a claim.
     Args:
         claim (str): The claim to be verified.
+        sentences (str): Sentences to check against the claim.
     Returns:
         float: A weighted score between 0.0 and 1.0.
     """
+    if not sentence:
+        return 0.1
     with torch.no_grad():
         claim_embedding = model.encode(claim, show_progress_bar=False)
+        sentence_embedding = model.encode(sentence, show_progress_bar=False)
+        cosine_score = util.cos_sim(claim_embedding, sentence_embedding)
+        claim_sentiment = TextBlob(claim).sentiment.polarity
+        sentence_sentiment = TextBlob(sentence).sentiment.polarity
+        similarity = cosine_score.item()
+        if claim_sentiment * sentence_sentiment > 0:
+            similarity *= 1.1
+        elif claim_sentiment * sentence_sentiment < 0:
+            similarity *= 0.9
+        # print(f"Sentence: {sentence}\nSimilarity: {similarity:.2f}\n")
+        final_score = max(0.0, min(1.0, similarity))
     return final_score
     while True:
         claim_to_verify = input("Enter claim to verify: ")
         evidence = input("Enter evidence sentences: ")
+        final_score = calculate_semantic_similarity(claim_to_verify, evidence)
         print(f"The final weighted support score for the claim is: {final_score:.4f}")