Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| from confusables import is_confusable, confusable_regex | |
| SCAM_BRANDS_FILE = os.path.join(os.path.dirname(__file__), 'scam_brands.txt') | |
| def load_scam_brands(): | |
| """Load the list of scam brands from the text file.""" | |
| with open(SCAM_BRANDS_FILE, encoding='utf-8') as f: | |
| return [line.strip().strip('"') for line in f if line.strip()] | |
| SCAM_BRANDS = load_scam_brands() | |
| def find_confusable_brand(message): | |
| """ | |
| Check if the message contains a confusable/homoglyph variant of any scam brand. | |
| Returns the matched brand if found, otherwise None. | |
| Does not return the brand if the match is an exact (byte-for-byte, case-sensitive) match. | |
| """ | |
| for brand in SCAM_BRANDS: | |
| # Build a regex that matches the brand or any confusable variant | |
| regex_string = confusable_regex(brand, include_character_padding=False) | |
| regex = re.compile(regex_string) | |
| for match in regex.finditer(message): | |
| matched_text = match.group(0) | |
| # Skip if the matched text is exactly the same as the brand (case-sensitive) | |
| if matched_text.strip().lower() == brand.lower().strip(): | |
| continue | |
| else: | |
| print(f"matched_text: {matched_text.lower().strip()} brand: {brand.lower().strip()}") | |
| return brand | |
| return None | |
| def test_find_confusable_brand(): | |
| """ | |
| Test the find_confusable_brand function with example messages. | |
| """ | |
| test_cases = [ | |
| "This is a message from Amazοn support.", # Greek omicron instead of o | |
| "Your Αpple account has been locked.", # Greek capital alpha instead of a | |
| "Contact S0ciété Générale for more info.", # Zero instead of O | |
| "Welcome to Netflix!", | |
| "This is a message from a random sender.", | |
| "Bonjour, c'est le livreur votre colis ne rentrait pas dans la boite aux lettres merci de choisir un point relais sur : https://mondiaIrelais-expedition.com", | |
| "599915 est votre code de vérification Leboncoin." | |
| ] | |
| for msg in test_cases: | |
| result = find_confusable_brand(msg) | |
| if result: | |
| print(f"[ALERT] Message: '{msg}' => Confusable brand detected: {result}") | |
| else: | |
| print(f"[OK] Message: '{msg}' => No confusable brand detected.") | |
| if __name__ == "__main__": | |
| test_find_confusable_brand() |