ayush2917 commited on
Commit
15df91e
·
verified ·
1 Parent(s): 463e55e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -10
app.py CHANGED
@@ -50,6 +50,84 @@ def close_db(e=None):
50
  if db is not None:
51
  db.close()
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Initialize database
54
  init_db()
55
  app.teardown_appcontext(close_db)
@@ -60,36 +138,34 @@ embedding_model = None
60
  chatbot_tokenizer = None
61
  chatbot_model = None
62
  models_loaded = False
 
 
63
 
64
  def load_models():
65
  global summarizer, embedding_model, chatbot_tokenizer, chatbot_model, models_loaded
66
  try:
67
- # Create cache directory if not exists
68
- os.makedirs('/tmp/cache', exist_ok=True)
69
-
70
- # Load models with explicit cache_dir
71
  summarizer = pipeline(
72
  "summarization",
73
  model="facebook/bart-large-cnn",
74
  device=-1,
75
- cache_dir='/tmp/cache'
 
76
  )
77
 
78
  embedding_model = SentenceTransformer(
79
  "all-MiniLM-L6-v2",
80
- device="cpu",
81
- cache_folder='/tmp/cache'
82
  )
83
 
84
  chatbot_tokenizer = AutoTokenizer.from_pretrained(
85
- "microsoft/DialoGPT-small",
86
- cache_dir='/tmp/cache'
87
  )
88
  chatbot_model = AutoModelForCausalLM.from_pretrained(
89
  "microsoft/DialoGPT-small",
90
  device_map="cpu",
91
  torch_dtype=torch.float16,
92
- cache_dir='/tmp/cache'
93
  )
94
 
95
  models_loaded = True
 
50
  if db is not None:
51
  db.close()
52
 
53
+ def cache_news(articles):
54
+ db = get_db()
55
+ for article in articles:
56
+ try:
57
+ db.execute('''
58
+ INSERT OR IGNORE INTO news
59
+ (title, source, published, url, summary, content)
60
+ VALUES (?,?,?,?,?,?)
61
+ ''', (
62
+ article.get('title', ''),
63
+ article.get('source', ''),
64
+ article.get('published', ''),
65
+ article.get('url', ''),
66
+ article.get('summary', ''),
67
+ article.get('content', '')
68
+ ))
69
+ except sqlite3.IntegrityError:
70
+ continue
71
+ db.commit()
72
+
73
+ def get_cached_news():
74
+ db = get_db()
75
+ cur = db.execute('SELECT * FROM news ORDER BY timestamp DESC LIMIT ?',
76
+ (MAX_NEWS_ARTICLES,))
77
+ return [dict(row) for row in cur.fetchall()]
78
+
79
+ def fetch_news():
80
+ global last_api_call
81
+
82
+ current_time = time.time()
83
+ if current_time - last_api_call < API_CALL_INTERVAL:
84
+ time.sleep(API_CALL_INTERVAL - (current_time - last_api_call))
85
+ last_api_call = time.time()
86
+
87
+ try:
88
+ to_date = datetime.now().strftime('%Y-%m-%d')
89
+ from_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
90
+
91
+ url = (f"https://newsapi.org/v2/everything?"
92
+ f"q={' OR '.join(FINANCIAL_TERMS)} AND {' OR '.join(INDIA_KEYWORDS)}&"
93
+ f"from={from_date}&to={to_date}&"
94
+ f"sortBy=publishedAt&"
95
+ f"pageSize={MAX_NEWS_ARTICLES}&"
96
+ f"apiKey={NEWS_API_KEY}")
97
+
98
+ response = requests.get(url, timeout=10)
99
+ response.raise_for_status()
100
+
101
+ articles = response.json().get('articles', [])
102
+ processed = []
103
+
104
+ for article in articles[:MAX_NEWS_ARTICLES]:
105
+ if article['title'] != '[Removed]' and article['content']:
106
+ content = article['content'][:1024]
107
+ summary = content[:200] + "..."
108
+
109
+ if summarizer and models_loaded:
110
+ try:
111
+ summary = summarizer(content, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
112
+ except Exception as e:
113
+ print(f"Summarization error: {str(e)}")
114
+
115
+ processed.append({
116
+ 'title': article['title'],
117
+ 'source': article['source']['name'],
118
+ 'published': article['publishedAt'],
119
+ 'url': article['url'],
120
+ 'summary': summary,
121
+ 'content': content
122
+ })
123
+
124
+ cache_news(processed)
125
+ return processed
126
+
127
+ except Exception as e:
128
+ print(f"Error fetching news: {str(e)}")
129
+ return []
130
+
131
  # Initialize database
132
  init_db()
133
  app.teardown_appcontext(close_db)
 
138
  chatbot_tokenizer = None
139
  chatbot_model = None
140
  models_loaded = False
141
+ last_api_call = 0
142
+ API_CALL_INTERVAL = 1 # seconds
143
 
144
  def load_models():
145
  global summarizer, embedding_model, chatbot_tokenizer, chatbot_model, models_loaded
146
  try:
147
+ # Load models with explicit cache_dir and CPU optimization
 
 
 
148
  summarizer = pipeline(
149
  "summarization",
150
  model="facebook/bart-large-cnn",
151
  device=-1,
152
+ torch_dtype=torch.float16,
153
+ low_cpu_mem_usage=True
154
  )
155
 
156
  embedding_model = SentenceTransformer(
157
  "all-MiniLM-L6-v2",
158
+ device="cpu"
 
159
  )
160
 
161
  chatbot_tokenizer = AutoTokenizer.from_pretrained(
162
+ "microsoft/DialoGPT-small"
 
163
  )
164
  chatbot_model = AutoModelForCausalLM.from_pretrained(
165
  "microsoft/DialoGPT-small",
166
  device_map="cpu",
167
  torch_dtype=torch.float16,
168
+ low_cpu_mem_usage=True
169
  )
170
 
171
  models_loaded = True