Spaces:
Sleeping
Sleeping
Commit
·
d51c3cd
1
Parent(s):
f078461
fixed claim verification calculations
Browse files* check claim against entire paragraph instead of individual sentences
- .gitIgnore +6 -0
- .gitattributes +1 -5
- deploy/main/claim_verifier.py +21 -28
- requirements.txt +3 -1
- semantic_similarity.py +16 -48
.gitIgnore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.vscode
|
| 2 |
+
.venv
|
| 3 |
+
venv
|
| 4 |
+
**/__pycache__/
|
| 5 |
+
snli_1.0_dev.jsonl
|
| 6 |
+
api_call.py
|
.gitattributes
CHANGED
|
@@ -33,8 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
-
|
| 37 |
-
.venv
|
| 38 |
-
venv
|
| 39 |
-
**/__pycache__/
|
| 40 |
-
snli_1.0_dev.jsonl
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
|
|
|
|
|
|
|
|
|
|
|
|
deploy/main/claim_verifier.py
CHANGED
|
@@ -208,9 +208,7 @@ class ClaimVerifier:
|
|
| 208 |
def _get_from_cache(self, key: str) -> Optional[Dict]:
|
| 209 |
return self.claim_cache.get(key)
|
| 210 |
|
| 211 |
-
def _semantic_similarity_with_sentences(
|
| 212 |
-
self, claim: str, sentences: List[str]
|
| 213 |
-
) -> float:
|
| 214 |
"""Calculate entailment scores and return the best one."""
|
| 215 |
try:
|
| 216 |
score = calculate_semantic_similarity(claim, sentences)
|
|
@@ -260,15 +258,6 @@ class ClaimVerifier:
|
|
| 260 |
result
|
| 261 |
)
|
| 262 |
|
| 263 |
-
# Enhanced Logging Format
|
| 264 |
-
logging.info(f"\nSource: {url} ({domain_type})")
|
| 265 |
-
# logging.info(
|
| 266 |
-
# f" - Relevant Sentences: {sentences[:3]}"
|
| 267 |
-
# ) # Log first 2 sentences
|
| 268 |
-
logging.info(
|
| 269 |
-
f" - Entailment Score: {similarity_score:.2f}"
|
| 270 |
-
)
|
| 271 |
-
|
| 272 |
total_weight += domain_weight
|
| 273 |
if similarity_score >= 0.4:
|
| 274 |
support_scores.append(similarity_score * domain_weight)
|
|
@@ -282,6 +271,10 @@ class ClaimVerifier:
|
|
| 282 |
"relevant_sentences": sentences[:3],
|
| 283 |
}
|
| 284 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
except Exception as e:
|
| 286 |
logging.error(f"Error processing {url}: {e}")
|
| 287 |
except TimeoutError:
|
|
@@ -290,12 +283,12 @@ class ClaimVerifier:
|
|
| 290 |
support_sum = sum(support_scores)
|
| 291 |
|
| 292 |
if total_weight > 0:
|
| 293 |
-
final_score = min(1.0, support_sum /
|
| 294 |
# Adjustments
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
else:
|
| 300 |
final_score = 0.1
|
| 301 |
|
|
@@ -347,19 +340,19 @@ class ClaimVerifier:
|
|
| 347 |
if not relevant_sentences:
|
| 348 |
return None
|
| 349 |
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
|
| 361 |
semantic_similarity = self._semantic_similarity_with_sentences(
|
| 362 |
-
claim,
|
| 363 |
)
|
| 364 |
|
| 365 |
domain_weight, domain_type = self._get_domain_weight(url)
|
|
|
|
| 208 |
def _get_from_cache(self, key: str) -> Optional[Dict]:
|
| 209 |
return self.claim_cache.get(key)
|
| 210 |
|
| 211 |
+
def _semantic_similarity_with_sentences(self, claim: str, sentences: str) -> float:
|
|
|
|
|
|
|
| 212 |
"""Calculate entailment scores and return the best one."""
|
| 213 |
try:
|
| 214 |
score = calculate_semantic_similarity(claim, sentences)
|
|
|
|
| 258 |
result
|
| 259 |
)
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
total_weight += domain_weight
|
| 262 |
if similarity_score >= 0.4:
|
| 263 |
support_scores.append(similarity_score * domain_weight)
|
|
|
|
| 271 |
"relevant_sentences": sentences[:3],
|
| 272 |
}
|
| 273 |
)
|
| 274 |
+
|
| 275 |
+
for source_detail in source_details:
|
| 276 |
+
logging.info(f"Source Details:\n{source_detail}\n")
|
| 277 |
+
|
| 278 |
except Exception as e:
|
| 279 |
logging.error(f"Error processing {url}: {e}")
|
| 280 |
except TimeoutError:
|
|
|
|
| 283 |
support_sum = sum(support_scores)
|
| 284 |
|
| 285 |
if total_weight > 0:
|
| 286 |
+
final_score = min(1.0, support_sum / total_weight)
|
| 287 |
# Adjustments
|
| 288 |
+
if final_score < 0.5:
|
| 289 |
+
final_score *= 0.9
|
| 290 |
+
elif final_score > 0.5:
|
| 291 |
+
final_score *= 1.1
|
| 292 |
else:
|
| 293 |
final_score = 0.1
|
| 294 |
|
|
|
|
| 340 |
if not relevant_sentences:
|
| 341 |
return None
|
| 342 |
|
| 343 |
+
cleaned_content = ""
|
| 344 |
+
for sentence in relevant_sentences:
|
| 345 |
+
if (
|
| 346 |
+
sentence.endswith(".")
|
| 347 |
+
or sentence.endswith("?")
|
| 348 |
+
or sentence.endswith("!")
|
| 349 |
+
):
|
| 350 |
+
cleaned_content += f"{sentence} "
|
| 351 |
+
else:
|
| 352 |
+
cleaned_content += f"{sentence}. "
|
| 353 |
|
| 354 |
semantic_similarity = self._semantic_similarity_with_sentences(
|
| 355 |
+
claim, cleaned_content
|
| 356 |
)
|
| 357 |
|
| 358 |
domain_weight, domain_type = self._get_domain_weight(url)
|
requirements.txt
CHANGED
|
@@ -10,4 +10,6 @@ nltk==3.9.1
|
|
| 10 |
sentence-transformers==4.1.0
|
| 11 |
torch==2.7.1
|
| 12 |
scikit-learn==1.6.1
|
| 13 |
-
textblob==0.19.0
|
|
|
|
|
|
|
|
|
| 10 |
sentence-transformers==4.1.0
|
| 11 |
torch==2.7.1
|
| 12 |
scikit-learn==1.6.1
|
| 13 |
+
textblob==0.19.0
|
| 14 |
+
gradio==5.37.0
|
| 15 |
+
gradio_client==1.10.4
|
semantic_similarity.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
| 4 |
-
from typing import List
|
| 5 |
import torch
|
| 6 |
from sentence_transformers import SentenceTransformer, util
|
| 7 |
from textblob import TextBlob
|
|
@@ -10,54 +9,35 @@ model = SentenceTransformer("paraphrase-MiniLM-L12-v2")
|
|
| 10 |
model.eval()
|
| 11 |
|
| 12 |
|
| 13 |
-
def calculate_semantic_similarity(
|
| 14 |
-
claim: str, sentences: List[str], similarity_threshold: float = 0.4
|
| 15 |
-
) -> float:
|
| 16 |
"""
|
| 17 |
Calculates a weighted score representing how well a list of sentences supports a claim.
|
| 18 |
Args:
|
| 19 |
claim (str): The claim to be verified.
|
| 20 |
-
sentences (
|
| 21 |
-
similarity_threshold (float, optional): The minimum similarity score for a
|
| 22 |
-
sentence to be considered "supporting". Defaults to 0.5.
|
| 23 |
-
|
| 24 |
Returns:
|
| 25 |
float: A weighted score between 0.0 and 1.0.
|
| 26 |
"""
|
| 27 |
-
if not
|
| 28 |
-
return 0.
|
| 29 |
-
|
| 30 |
-
all_scores = []
|
| 31 |
|
| 32 |
with torch.no_grad():
|
| 33 |
claim_embedding = model.encode(claim, show_progress_bar=False)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
claim_sentiment = TextBlob(claim).sentiment.polarity
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
sentence_sentiment = TextBlob(sentence).sentiment.polarity
|
| 41 |
-
|
| 42 |
-
if claim_sentiment * sentence_sentiment > 0:
|
| 43 |
-
similarity *= 1.1
|
| 44 |
-
elif claim_sentiment * sentence_sentiment < 0:
|
| 45 |
-
similarity *= 0.9
|
| 46 |
-
|
| 47 |
-
# print(f"Sentence: {sentence}\nSimilarity: {similarity:.2f}\n")
|
| 48 |
-
similarity = max(0.0, min(1.0, similarity))
|
| 49 |
-
all_scores.append(similarity)
|
| 50 |
|
| 51 |
-
|
| 52 |
-
proportion_supporting = len(supporting_scores) / len(sentences)
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
# penalty = 0.80 # 20% reduction
|
| 59 |
-
final_score = average_all_scores # * penalty
|
| 60 |
|
|
|
|
|
|
|
| 61 |
return final_score
|
| 62 |
|
| 63 |
|
|
@@ -65,20 +45,8 @@ if __name__ == "__main__":
|
|
| 65 |
while True:
|
| 66 |
claim_to_verify = input("Enter claim to verify: ")
|
| 67 |
evidence = input("Enter evidence sentences: ")
|
| 68 |
-
evidence_sentences = [
|
| 69 |
-
"The recent legislation is projected to stimulate significant economic growth.", # High similarity
|
| 70 |
-
"Market analysts are optimistic about the financial future following the announcement.", # High similarity
|
| 71 |
-
"However, some critics argue that the policy might lead to unforeseen inflation.", # Low similarity
|
| 72 |
-
"The stock market reacted positively, showing a slight increase.", # Medium similarity
|
| 73 |
-
"This is considered a poor decision for the nation's financial stability by some experts.", # Opposing sentiment
|
| 74 |
-
"The primary goal of the initiative is to create jobs and encourage consumer spending.", # High similarity
|
| 75 |
-
"Unemployment rates are expected to decline in the coming months.", # High similarity
|
| 76 |
-
"There has been some public disapproval regarding the policy's rollout.", # Low similarity
|
| 77 |
-
"This will surely lead to a stronger and more resilient economy.", # High similarity
|
| 78 |
-
"Financial experts have voiced concerns about the potential long-term consequences.", # Opposing sentiment
|
| 79 |
-
]
|
| 80 |
|
| 81 |
-
final_score = calculate_semantic_similarity(claim_to_verify,
|
| 82 |
|
| 83 |
print(f"The final weighted support score for the claim is: {final_score:.4f}")
|
| 84 |
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
|
|
|
| 4 |
import torch
|
| 5 |
from sentence_transformers import SentenceTransformer, util
|
| 6 |
from textblob import TextBlob
|
|
|
|
| 9 |
model.eval()
|
| 10 |
|
| 11 |
|
| 12 |
+
def calculate_semantic_similarity(claim: str, sentence: str) -> float:
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
Calculates a weighted score representing how well a list of sentences supports a claim.
|
| 15 |
Args:
|
| 16 |
claim (str): The claim to be verified.
|
| 17 |
+
sentences (str): Sentences to check against the claim.
|
|
|
|
|
|
|
|
|
|
| 18 |
Returns:
|
| 19 |
float: A weighted score between 0.0 and 1.0.
|
| 20 |
"""
|
| 21 |
+
if not sentence:
|
| 22 |
+
return 0.1
|
|
|
|
|
|
|
| 23 |
|
| 24 |
with torch.no_grad():
|
| 25 |
claim_embedding = model.encode(claim, show_progress_bar=False)
|
| 26 |
+
sentence_embedding = model.encode(sentence, show_progress_bar=False)
|
| 27 |
+
cosine_score = util.cos_sim(claim_embedding, sentence_embedding)
|
|
|
|
| 28 |
|
| 29 |
+
claim_sentiment = TextBlob(claim).sentiment.polarity
|
| 30 |
+
sentence_sentiment = TextBlob(sentence).sentiment.polarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
+
similarity = cosine_score.item()
|
|
|
|
| 33 |
|
| 34 |
+
if claim_sentiment * sentence_sentiment > 0:
|
| 35 |
+
similarity *= 1.1
|
| 36 |
+
elif claim_sentiment * sentence_sentiment < 0:
|
| 37 |
+
similarity *= 0.9
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
# print(f"Sentence: {sentence}\nSimilarity: {similarity:.2f}\n")
|
| 40 |
+
final_score = max(0.0, min(1.0, similarity))
|
| 41 |
return final_score
|
| 42 |
|
| 43 |
|
|
|
|
| 45 |
while True:
|
| 46 |
claim_to_verify = input("Enter claim to verify: ")
|
| 47 |
evidence = input("Enter evidence sentences: ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
+
final_score = calculate_semantic_similarity(claim_to_verify, evidence)
|
| 50 |
|
| 51 |
print(f"The final weighted support score for the claim is: {final_score:.4f}")
|
| 52 |
|