|  | import streamlit as st | 
					
						
						|  | from GoogleNews import GoogleNews | 
					
						
						|  |  | 
					
						
						|  | import pandas as pd | 
					
						
						|  | import numpy as np | 
					
						
						|  | import spacy | 
					
						
						|  | import gensim | 
					
						
						|  | import string | 
					
						
						|  | import re | 
					
						
						|  | import tweepy | 
					
						
						|  | import emoji | 
					
						
						|  | import sklearn | 
					
						
						|  | from sklearn.metrics import classification_report, recall_score, precision_score, accuracy_score, f1_score | 
					
						
						|  | from sklearn.metrics.pairwise import cosine_similarity | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | consumerKey = 'RMhfvyw1KCfg31b0TWBscMLtx' | 
					
						
						|  | consumerSecret = 'jZ2axRqcAmG2BYR4LKkUBfhzef6lTDwA7AWy0NpAI5Po7bo9UZ' | 
					
						
						|  | accessToken = '56190940-G2zHEicg1ZI1B7dtZbu7XbksOLEY4lpNWj93FOWxL' | 
					
						
						|  | accessTokenSeceret='oLMNJJ4FF0omugCXcO5cfoQsaD7Wm15pkQ1koO9SbEurE' | 
					
						
						|  |  | 
					
						
						|  | auth = tweepy.OAuthHandler(consumerKey, consumerSecret) | 
					
						
						|  | auth.set_access_token(accessToken, accessTokenSeceret) | 
					
						
						|  | api = tweepy.API(auth) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.sidebar.markdown('ู
ูุงูุน ุงุฎุจุงุฑูู ู
ุนุชู
ุฏู ') | 
					
						
						|  | st.sidebar.markdown("[ุงูุนุฑุจูุฉ](https://www.alarabiya.net/)") | 
					
						
						|  | st.sidebar.markdown("[ุงูุฌุฒูุฑุฉ ูุช](https://www.aljazeera.net/news/)") | 
					
						
						|  | st.sidebar.markdown("[CNN Arabic](https://arabic.cnn.com/)") | 
					
						
						|  | st.sidebar.markdown("[BBC Arabic](https://www.bbc.com/arabic)") | 
					
						
						|  | st.sidebar.markdown("[reuters Arabic](https://www.reutersagency.com/ar/)") | 
					
						
						|  |  | 
					
						
						|  | st.sidebar.markdown("[ููุงูุฉ ุงูุงูุจุงุก ุงููููุชูุฉ](https://www.kuna.net.kw/Default.aspx?language=ar)") | 
					
						
						|  | st.sidebar.markdown("[ููุงูุฉ ุงูุงูุจุงุก ุงูุณุนูุฏูุฉ](https://www.spa.gov.sa/)") | 
					
						
						|  | st.sidebar.markdown("[ููุงูุฉ ุงูุงูุจุงุก ุงูุงู
ุงุฑุงุชูุฉ](http://wam.ae/ar)") | 
					
						
						|  | st.sidebar.markdown("[ููุงูุฉ ุงูุงูุจุงุก ุงูุนู
ุงููุฉ](https://omannews.gov.om/)") | 
					
						
						|  | st.sidebar.markdown("[ููุงูุฉ ุงูุงูุจุงุก ุงููุทุฑูุฉ] (https://www.qna.org.qa/)") | 
					
						
						|  | st.sidebar.markdown("[ููุงูุฉ ุงูุงูุจุงุก ุงูุจุญุฑูููุฉ] (https://www.bna.bh/)") | 
					
						
						|  | st.sidebar.markdown("[ุงุชุญุงุฏ ููุงูุงุช ุงูุฃูุจุงุก ุงูุนุฑุจูุฉ: ูุงูุง](https://www.fananews.com/)") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | nlp = spacy.load("spacy.aravec.model") | 
					
						
						|  |  | 
					
						
						|  | st.write(""" | 
					
						
						|  | YAQEN: is an Arabic headline news detection system Submitted to the College of Graduate Studies in Partial Fulfillment of the Requirements for the M.Sc. | 
					
						
						|  | Degree in:Computing Information Systems | 
					
						
						|  | Submitted by: | 
					
						
						|  | Reem Alfouzan | 
					
						
						|  | Project Supervisor: | 
					
						
						|  | Dr. Abdullah Almutairi | 
					
						
						|  | """) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tx = st.text_input (''' ุงูุฑุฌุงุก ุงุฏุฎุงู ุงูุนููุงู ุงูู
ุฑุงุฏ ุงูุชุงูุฏ ู
ู ุตุญุชู ''') | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def clean_str(text): | 
					
						
						|  | search = ["ุฃ","ุฅ","ุข","ุฉ","_","-","/",".","ุ"," ู "," ูุง ",'"',"ู","'","ู","\\",'\n', '\t','"','?','ุ','!'] | 
					
						
						|  | replace = ["ุง","ุง","ุง","ู"," "," ","","",""," ู"," ูุง","","","","ู","",' ', ' ',' ',' ? ',' ุ ',' ! '] | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | p_tashkeel = re.compile(r'[\u0617-\u061A\u064B-\u0652]') | 
					
						
						|  | text = re.sub(p_tashkeel,"", text) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | p_longation = re.compile(r'(.)\1+') | 
					
						
						|  | subst = r"\1\1" | 
					
						
						|  | text = re.sub(p_longation, subst, text) | 
					
						
						|  |  | 
					
						
						|  | text = text.replace('ูู', 'ู') | 
					
						
						|  | text = text.replace('ูู', 'ู') | 
					
						
						|  | text = text.replace('ุงุง', 'ุง') | 
					
						
						|  |  | 
					
						
						|  | for i in range(0, len(search)): | 
					
						
						|  | text = text.replace(search[i], replace[i]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | text = text.strip() | 
					
						
						|  |  | 
					
						
						|  | return text | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def split_hashtag_to_words(tag): | 
					
						
						|  | tag = tag.replace('#','') | 
					
						
						|  | tags = tag.split('_') | 
					
						
						|  | if len(tags) > 1 : | 
					
						
						|  | return tags | 
					
						
						|  | pattern = re.compile(r"[A-Z][a-z]+|\d+|[A-Z]+(?![a-z])") | 
					
						
						|  | return pattern.findall(tag) | 
					
						
						|  |  | 
					
						
						|  | def clean_hashtag(text): | 
					
						
						|  | words = text.split() | 
					
						
						|  | text = list() | 
					
						
						|  | for word in words: | 
					
						
						|  | if is_hashtag(word): | 
					
						
						|  | text.extend(extract_hashtag(word)) | 
					
						
						|  | else: | 
					
						
						|  | text.append(word) | 
					
						
						|  | return " ".join(text) | 
					
						
						|  |  | 
					
						
						|  | def is_hashtag(word): | 
					
						
						|  | if word.startswith("#"): | 
					
						
						|  | return True | 
					
						
						|  | else: | 
					
						
						|  | return False | 
					
						
						|  |  | 
					
						
						|  | def extract_hashtag(text): | 
					
						
						|  |  | 
					
						
						|  | hash_list = ([re.sub(r"(\W+)$", "", i) for i in text.split() if i.startswith("#")]) | 
					
						
						|  | word_list = [] | 
					
						
						|  | for word in hash_list : | 
					
						
						|  | word_list.extend(split_hashtag_to_words(word)) | 
					
						
						|  |  | 
					
						
						|  | return word_list | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def clean_tweet(text): | 
					
						
						|  |  | 
					
						
						|  | text = re.sub(r"http\S+", "", text) | 
					
						
						|  | text = re.sub(r"www.\S+", "", text) | 
					
						
						|  | text = ''.join(c for c in text if c not in emoji.UNICODE_EMOJI) | 
					
						
						|  |  | 
					
						
						|  | return text | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class Preprocessor: | 
					
						
						|  | def __init__(self, tokenizer, **cfg): | 
					
						
						|  | self.tokenizer = tokenizer | 
					
						
						|  |  | 
					
						
						|  | def __call__(self, text): | 
					
						
						|  | preprocessed = clean_str(text) | 
					
						
						|  | return self.tokenizer(preprocessed) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def No_Rumors(tx): | 
					
						
						|  | no_of_tweets = api.get_user(screen_name="No_Rumors").statuses_count | 
					
						
						|  | tweets = tweepy.Cursor(api.user_timeline,screen_name="No_Rumors", tweet_mode="extended").items(no_of_tweets) | 
					
						
						|  | data = [] | 
					
						
						|  | for i in tweets: | 
					
						
						|  | data.append({"text":i.full_text}) | 
					
						
						|  |  | 
					
						
						|  | result_text = pd.DataFrame(data) | 
					
						
						|  |  | 
					
						
						|  | if len(result_text) == 0: | 
					
						
						|  | prediction = 1 | 
					
						
						|  |  | 
					
						
						|  | else : | 
					
						
						|  | result_text['text'] = result_text['text'].apply(lambda x: clean_tweet(x)) | 
					
						
						|  | result_text['text'] = result_text['text'].apply(lambda x: clean_hashtag(x)) | 
					
						
						|  | result_text['text'] = result_text['text'].apply(lambda x: clean_str(x)) | 
					
						
						|  | result_text['text'] = result_text['text'].apply(lambda x: nlp(x).similarity(nlp(tx))) | 
					
						
						|  |  | 
					
						
						|  | sg300top = result_text['text'].max(axis = 0) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if sg300top >= .85: | 
					
						
						|  | prediction = ' ุงูุฎุจุฑ ุฒุงุฆู' | 
					
						
						|  | else: | 
					
						
						|  | prediction = ' ุงูุฎุจุฑ ุตุญูุญ' | 
					
						
						|  |  | 
					
						
						|  | return prediction | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | nlp.tokenizer = Preprocessor(nlp.tokenizer) | 
					
						
						|  |  | 
					
						
						|  | if len(tx) != 0: | 
					
						
						|  | googlenews = GoogleNews(lang='ar') | 
					
						
						|  | googlenews.clear() | 
					
						
						|  |  | 
					
						
						|  | f =0 | 
					
						
						|  | Prediction ='' | 
					
						
						|  | top_similar_ind ='' | 
					
						
						|  | top_similar_news ='' | 
					
						
						|  | medium ='' | 
					
						
						|  | top_similar_ind2 ='' | 
					
						
						|  | tp_desc ='' | 
					
						
						|  |  | 
					
						
						|  | st.markdown(f"Searching for: { tx         }") | 
					
						
						|  | st.markdown(f"ูููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููููู") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tx = clean_hashtag(tx) | 
					
						
						|  | tx = clean_str(tx) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | googlenews.search(tx) | 
					
						
						|  | result = googlenews.page_at(1) | 
					
						
						|  | googlenews.clear() | 
					
						
						|  |  | 
					
						
						|  | if len(result) == 0: | 
					
						
						|  | Prediction ='ุงูุฎุจุฑ ุฒุงุฆู' | 
					
						
						|  | top_similar_news ='ูุง ููุฌุฏ ุงุฎุจุงุฑ ู
ู
ุงุซูู' | 
					
						
						|  | medium ='ูุง ููุฌุฏ ู
ุตุฏุฑ' | 
					
						
						|  | tp_desc ='ูุง ููุฌุฏ ูุตู' | 
					
						
						|  |  | 
					
						
						|  | else: | 
					
						
						|  | result_text = {"Text":[]} | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | for i in range(len(result)): | 
					
						
						|  | title =result[i]['title'] | 
					
						
						|  | result_text['Text'].append(title) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | result_text2 = {"Text":[]} | 
					
						
						|  |  | 
					
						
						|  | for i in range(len(result)): | 
					
						
						|  | desc =result[i]['desc'] | 
					
						
						|  | result_text2['Text'].append(desc) | 
					
						
						|  |  | 
					
						
						|  | result_text = pd.DataFrame(result_text) | 
					
						
						|  | result_text2 = pd.DataFrame(result_text2) | 
					
						
						|  |  | 
					
						
						|  | data = pd.DataFrame() | 
					
						
						|  | data['Text2'] = result_text['Text'].copy() | 
					
						
						|  |  | 
					
						
						|  | data['Text2'] = data['Text2'].apply(lambda x: nlp(x).similarity(nlp(tx))) | 
					
						
						|  | sg300top = data['Text2'].max(axis = 0) | 
					
						
						|  |  | 
					
						
						|  | top_similar_ind = np.argmax(data['Text2']) | 
					
						
						|  | top_similar_news = result[top_similar_ind]['title'] | 
					
						
						|  | descr = result[top_similar_ind]['desc'] | 
					
						
						|  | medium = result[top_similar_ind]['media'] | 
					
						
						|  | date = result[top_similar_ind]['date'] | 
					
						
						|  | link = result[top_similar_ind]['link'] | 
					
						
						|  |  | 
					
						
						|  | data['Text3'] = result_text2['Text'].copy() | 
					
						
						|  | data['Text3'] = data['Text3'].apply(lambda x: nlp(x).similarity(nlp(tx))) | 
					
						
						|  | sg300top2 = data['Text3'].max(axis = 0) | 
					
						
						|  | top_similar_ind2 = np.argmax(data['Text3']) | 
					
						
						|  | tp_desc = result[top_similar_ind2]['desc'] | 
					
						
						|  |  | 
					
						
						|  | if sg300top >= .85 or sg300top2 >= .85 : | 
					
						
						|  | Prediction = No_Rumors(tx) | 
					
						
						|  | else: | 
					
						
						|  | Prediction =' ุงูุฎุจุฑ ุฒุงุฆู' | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.markdown(f"System Prediction : { Prediction         }") | 
					
						
						|  | st.markdown(f"ุงูุฎุจุฑ ุงูู
ู
ุงุซู: { top_similar_news         }") | 
					
						
						|  | st.markdown(f"") | 
					
						
						|  | st.markdown(f"ุชุงุฑูุฎ ุงูุฎุจุฑ: { date         }") | 
					
						
						|  | st.markdown(f"") | 
					
						
						|  | st.markdown(f"ุงูุชูุตูู: { descr         }") | 
					
						
						|  | st.markdown(f"") | 
					
						
						|  | st.markdown(f"ุงูู
ุตุฏุฑ: { medium         }") | 
					
						
						|  | st.markdown(f"") | 
					
						
						|  | st.markdown(f"ุฑุงุจุท ุงูุฎุจุฑ: { link         }") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  |  |