Spaces:

ayush2917
/

finance-news-api

Running

App Files Files Community

ayush2917 commited on Mar 31

Commit

15df91e

verified ·

1 Parent(s): 463e55e

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -10

app.py CHANGED Viewed

@@ -50,6 +50,84 @@ def close_db(e=None):
     if db is not None:
         db.close()
 # Initialize database
 init_db()
 app.teardown_appcontext(close_db)
@@ -60,36 +138,34 @@ embedding_model = None
 chatbot_tokenizer = None
 chatbot_model = None
 models_loaded = False
 def load_models():
     global summarizer, embedding_model, chatbot_tokenizer, chatbot_model, models_loaded
     try:
-        # Create cache directory if not exists
-        os.makedirs('/tmp/cache', exist_ok=True)
-        # Load models with explicit cache_dir
         summarizer = pipeline(
             "summarization",
             model="facebook/bart-large-cnn",
             device=-1,
-            cache_dir='/tmp/cache'
         )
         embedding_model = SentenceTransformer(
             "all-MiniLM-L6-v2",
-            device="cpu",
-            cache_folder='/tmp/cache'
         )
         chatbot_tokenizer = AutoTokenizer.from_pretrained(
-            "microsoft/DialoGPT-small",
-            cache_dir='/tmp/cache'
         )
         chatbot_model = AutoModelForCausalLM.from_pretrained(
             "microsoft/DialoGPT-small",
             device_map="cpu",
             torch_dtype=torch.float16,
-            cache_dir='/tmp/cache'
         )
         models_loaded = True

     if db is not None:
         db.close()
+def cache_news(articles):
+    db = get_db()
+    for article in articles:
+        try:
+            db.execute('''
+                INSERT OR IGNORE INTO news
+                (title, source, published, url, summary, content)
+                VALUES (?,?,?,?,?,?)
+            ''', (
+                article.get('title', ''),
+                article.get('source', ''),
+                article.get('published', ''),
+                article.get('url', ''),
+                article.get('summary', ''),
+                article.get('content', '')
+            ))
+        except sqlite3.IntegrityError:
+            continue
+    db.commit()
+def get_cached_news():
+    db = get_db()
+    cur = db.execute('SELECT * FROM news ORDER BY timestamp DESC LIMIT ?',
+                    (MAX_NEWS_ARTICLES,))
+    return [dict(row) for row in cur.fetchall()]
+def fetch_news():
+    global last_api_call
+    current_time = time.time()
+    if current_time - last_api_call < API_CALL_INTERVAL:
+        time.sleep(API_CALL_INTERVAL - (current_time - last_api_call))
+    last_api_call = time.time()
+    try:
+        to_date = datetime.now().strftime('%Y-%m-%d')
+        from_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
+        url = (f"https://newsapi.org/v2/everything?"
+               f"q={' OR '.join(FINANCIAL_TERMS)} AND {' OR '.join(INDIA_KEYWORDS)}&"
+               f"from={from_date}&to={to_date}&"
+               f"sortBy=publishedAt&"
+               f"pageSize={MAX_NEWS_ARTICLES}&"
+               f"apiKey={NEWS_API_KEY}")
+        response = requests.get(url, timeout=10)
+        response.raise_for_status()
+        articles = response.json().get('articles', [])
+        processed = []
+        for article in articles[:MAX_NEWS_ARTICLES]:
+            if article['title'] != '[Removed]' and article['content']:
+                content = article['content'][:1024]
+                summary = content[:200] + "..."
+                if summarizer and models_loaded:
+                    try:
+                        summary = summarizer(content, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
+                    except Exception as e:
+                        print(f"Summarization error: {str(e)}")
+                processed.append({
+                    'title': article['title'],
+                    'source': article['source']['name'],
+                    'published': article['publishedAt'],
+                    'url': article['url'],
+                    'summary': summary,
+                    'content': content
+                })
+        cache_news(processed)
+        return processed
+    except Exception as e:
+        print(f"Error fetching news: {str(e)}")
+        return []
 # Initialize database
 init_db()
 app.teardown_appcontext(close_db)
 chatbot_tokenizer = None
 chatbot_model = None
 models_loaded = False
+last_api_call = 0
+API_CALL_INTERVAL = 1  # seconds
 def load_models():
     global summarizer, embedding_model, chatbot_tokenizer, chatbot_model, models_loaded
     try:
+        # Load models with explicit cache_dir and CPU optimization
         summarizer = pipeline(
             "summarization",
             model="facebook/bart-large-cnn",
             device=-1,
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
         )
         embedding_model = SentenceTransformer(
             "all-MiniLM-L6-v2",
+            device="cpu"
         )
         chatbot_tokenizer = AutoTokenizer.from_pretrained(
+            "microsoft/DialoGPT-small"
         )
         chatbot_model = AutoModelForCausalLM.from_pretrained(
             "microsoft/DialoGPT-small",
             device_map="cpu",
             torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
         )
         models_loaded = True