Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -50,6 +50,84 @@ def close_db(e=None):
|
|
| 50 |
if db is not None:
|
| 51 |
db.close()
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Initialize database
|
| 54 |
init_db()
|
| 55 |
app.teardown_appcontext(close_db)
|
|
@@ -60,36 +138,34 @@ embedding_model = None
|
|
| 60 |
chatbot_tokenizer = None
|
| 61 |
chatbot_model = None
|
| 62 |
models_loaded = False
|
|
|
|
|
|
|
| 63 |
|
| 64 |
def load_models():
|
| 65 |
global summarizer, embedding_model, chatbot_tokenizer, chatbot_model, models_loaded
|
| 66 |
try:
|
| 67 |
-
#
|
| 68 |
-
os.makedirs('/tmp/cache', exist_ok=True)
|
| 69 |
-
|
| 70 |
-
# Load models with explicit cache_dir
|
| 71 |
summarizer = pipeline(
|
| 72 |
"summarization",
|
| 73 |
model="facebook/bart-large-cnn",
|
| 74 |
device=-1,
|
| 75 |
-
|
|
|
|
| 76 |
)
|
| 77 |
|
| 78 |
embedding_model = SentenceTransformer(
|
| 79 |
"all-MiniLM-L6-v2",
|
| 80 |
-
device="cpu"
|
| 81 |
-
cache_folder='/tmp/cache'
|
| 82 |
)
|
| 83 |
|
| 84 |
chatbot_tokenizer = AutoTokenizer.from_pretrained(
|
| 85 |
-
"microsoft/DialoGPT-small"
|
| 86 |
-
cache_dir='/tmp/cache'
|
| 87 |
)
|
| 88 |
chatbot_model = AutoModelForCausalLM.from_pretrained(
|
| 89 |
"microsoft/DialoGPT-small",
|
| 90 |
device_map="cpu",
|
| 91 |
torch_dtype=torch.float16,
|
| 92 |
-
|
| 93 |
)
|
| 94 |
|
| 95 |
models_loaded = True
|
|
|
|
| 50 |
if db is not None:
|
| 51 |
db.close()
|
| 52 |
|
| 53 |
+
def cache_news(articles):
|
| 54 |
+
db = get_db()
|
| 55 |
+
for article in articles:
|
| 56 |
+
try:
|
| 57 |
+
db.execute('''
|
| 58 |
+
INSERT OR IGNORE INTO news
|
| 59 |
+
(title, source, published, url, summary, content)
|
| 60 |
+
VALUES (?,?,?,?,?,?)
|
| 61 |
+
''', (
|
| 62 |
+
article.get('title', ''),
|
| 63 |
+
article.get('source', ''),
|
| 64 |
+
article.get('published', ''),
|
| 65 |
+
article.get('url', ''),
|
| 66 |
+
article.get('summary', ''),
|
| 67 |
+
article.get('content', '')
|
| 68 |
+
))
|
| 69 |
+
except sqlite3.IntegrityError:
|
| 70 |
+
continue
|
| 71 |
+
db.commit()
|
| 72 |
+
|
| 73 |
+
def get_cached_news():
|
| 74 |
+
db = get_db()
|
| 75 |
+
cur = db.execute('SELECT * FROM news ORDER BY timestamp DESC LIMIT ?',
|
| 76 |
+
(MAX_NEWS_ARTICLES,))
|
| 77 |
+
return [dict(row) for row in cur.fetchall()]
|
| 78 |
+
|
| 79 |
+
def fetch_news():
|
| 80 |
+
global last_api_call
|
| 81 |
+
|
| 82 |
+
current_time = time.time()
|
| 83 |
+
if current_time - last_api_call < API_CALL_INTERVAL:
|
| 84 |
+
time.sleep(API_CALL_INTERVAL - (current_time - last_api_call))
|
| 85 |
+
last_api_call = time.time()
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
to_date = datetime.now().strftime('%Y-%m-%d')
|
| 89 |
+
from_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
|
| 90 |
+
|
| 91 |
+
url = (f"https://newsapi.org/v2/everything?"
|
| 92 |
+
f"q={' OR '.join(FINANCIAL_TERMS)} AND {' OR '.join(INDIA_KEYWORDS)}&"
|
| 93 |
+
f"from={from_date}&to={to_date}&"
|
| 94 |
+
f"sortBy=publishedAt&"
|
| 95 |
+
f"pageSize={MAX_NEWS_ARTICLES}&"
|
| 96 |
+
f"apiKey={NEWS_API_KEY}")
|
| 97 |
+
|
| 98 |
+
response = requests.get(url, timeout=10)
|
| 99 |
+
response.raise_for_status()
|
| 100 |
+
|
| 101 |
+
articles = response.json().get('articles', [])
|
| 102 |
+
processed = []
|
| 103 |
+
|
| 104 |
+
for article in articles[:MAX_NEWS_ARTICLES]:
|
| 105 |
+
if article['title'] != '[Removed]' and article['content']:
|
| 106 |
+
content = article['content'][:1024]
|
| 107 |
+
summary = content[:200] + "..."
|
| 108 |
+
|
| 109 |
+
if summarizer and models_loaded:
|
| 110 |
+
try:
|
| 111 |
+
summary = summarizer(content, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
|
| 112 |
+
except Exception as e:
|
| 113 |
+
print(f"Summarization error: {str(e)}")
|
| 114 |
+
|
| 115 |
+
processed.append({
|
| 116 |
+
'title': article['title'],
|
| 117 |
+
'source': article['source']['name'],
|
| 118 |
+
'published': article['publishedAt'],
|
| 119 |
+
'url': article['url'],
|
| 120 |
+
'summary': summary,
|
| 121 |
+
'content': content
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
cache_news(processed)
|
| 125 |
+
return processed
|
| 126 |
+
|
| 127 |
+
except Exception as e:
|
| 128 |
+
print(f"Error fetching news: {str(e)}")
|
| 129 |
+
return []
|
| 130 |
+
|
| 131 |
# Initialize database
|
| 132 |
init_db()
|
| 133 |
app.teardown_appcontext(close_db)
|
|
|
|
| 138 |
chatbot_tokenizer = None
|
| 139 |
chatbot_model = None
|
| 140 |
models_loaded = False
|
| 141 |
+
last_api_call = 0
|
| 142 |
+
API_CALL_INTERVAL = 1 # seconds
|
| 143 |
|
| 144 |
def load_models():
|
| 145 |
global summarizer, embedding_model, chatbot_tokenizer, chatbot_model, models_loaded
|
| 146 |
try:
|
| 147 |
+
# Load models with explicit cache_dir and CPU optimization
|
|
|
|
|
|
|
|
|
|
| 148 |
summarizer = pipeline(
|
| 149 |
"summarization",
|
| 150 |
model="facebook/bart-large-cnn",
|
| 151 |
device=-1,
|
| 152 |
+
torch_dtype=torch.float16,
|
| 153 |
+
low_cpu_mem_usage=True
|
| 154 |
)
|
| 155 |
|
| 156 |
embedding_model = SentenceTransformer(
|
| 157 |
"all-MiniLM-L6-v2",
|
| 158 |
+
device="cpu"
|
|
|
|
| 159 |
)
|
| 160 |
|
| 161 |
chatbot_tokenizer = AutoTokenizer.from_pretrained(
|
| 162 |
+
"microsoft/DialoGPT-small"
|
|
|
|
| 163 |
)
|
| 164 |
chatbot_model = AutoModelForCausalLM.from_pretrained(
|
| 165 |
"microsoft/DialoGPT-small",
|
| 166 |
device_map="cpu",
|
| 167 |
torch_dtype=torch.float16,
|
| 168 |
+
low_cpu_mem_usage=True
|
| 169 |
)
|
| 170 |
|
| 171 |
models_loaded = True
|