Mustafa-albakkar commited on
Commit
6bdc0eb
·
verified ·
1 Parent(s): c17f6cf

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +228 -0
app.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================
2
+ # analyzer_agent/app.py — Telegram Analyzer Agent (async)
3
+ # Mamba + GGUF LLM + Pyrogram + FastAPI
4
+ # ============================================================
5
+ # Telegram
6
+ #TG_API_ID=...
7
+ #TG_API_HASH=...
8
+ #TG_BOT_TOKEN=... # بوت لديه صلاحية النشر في القناة
9
+ #TG_CHANNEL=@my_channel # أو -1001234567890
10
+
11
+ # Image agent
12
+ #HF_API_TOKEN=... # إن كنت تستخدم HF
13
+ #HF_MODEL=stabilityai/stable-diffusion-2
14
+ #USE_LOCAL_DIFFUSERS=0 # 1 إن أردت استخدام diffusers محليًا وكنت على GPU
15
+
16
+ import os
17
+ import json
18
+ import asyncio
19
+ from datetime import datetime
20
+ import logging
21
+
22
+ from fastapi import FastAPI
23
+ from pydantic import BaseModel
24
+ from apscheduler.schedulers.asyncio import AsyncIOScheduler
25
+
26
+ import torch
27
+ from transformers import AutoTokenizer, AutoModelForCausalLM
28
+
29
+ from pyrogram import Client
30
+
31
+ # llama.cpp (GGUF)
32
+ from llama_cpp import Llama
33
+ from huggingface_hub import hf_hub_download
34
+
35
+
36
+ # ---------------- Logging ----------------
37
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
38
+ log = logging.getLogger("analyzer")
39
+
40
+
41
+ # ---------------- Env & config ----------------
42
+ TG_API_ID = int(os.getenv("TG_API_ID", "0"))
43
+ TG_API_HASH = os.getenv("TG_API_HASH", "")
44
+ TG_BOT_TOKEN = os.getenv("TG_BOT_TOKEN")
45
+ TG_CHANNEL = os.getenv("TG_CHANNEL")
46
+ LOG_PATH = os.getenv("ANALYZER_LOG", "analyzer_log.json")
47
+ POSTS_LIMIT = int(os.getenv("ANALYZER_LIMIT", "80"))
48
+
49
+ MAMBA_MODEL_PATH = os.getenv("MAMBA_MODEL_PATH", "state-spaces/mamba2-1.3b")
50
+
51
+ # يتم تجاهل LLM_MODEL_PATH في حالة GGUF لكن نتركه للانسجام
52
+ LLM_MODEL_PATH = os.getenv("LLM_MODEL_PATH", "unused_for_gguf")
53
+
54
+ # ---------------- Load Mamba ----------------
55
+ log.info("Loading Mamba model...")
56
+
57
+ mamba_tok = AutoTokenizer.from_pretrained(MAMBA_MODEL_PATH)
58
+ mamba_model = AutoModelForCausalLM.from_pretrained(
59
+ MAMBA_MODEL_PATH,
60
+ torch_dtype=torch.float16,
61
+ low_cpu_mem_usage=True
62
+ )
63
+
64
+
65
+ # ---------------- Load GGUF LLM (Zephyr 7B) ----------------
66
+ log.info("Loading LLM interpreter (GGUF + llama.cpp)...")
67
+
68
+ LLM_GGUF_REPO = "TheBloke/zephyr-7B-beta-GGUF"
69
+ LLM_GGUF_FILE = "zephyr-7b-beta.Q6_K.gguf"
70
+
71
+ LLM_LOCAL_PATH = os.getenv("LLM_GGUF_PATH", f"./{LLM_GGUF_FILE}")
72
+
73
+ if not os.path.exists(LLM_LOCAL_PATH):
74
+ log.info("Downloading GGUF model from HuggingFace...")
75
+ LLM_LOCAL_PATH = hf_hub_download(
76
+ repo_id=LLM_GGUF_REPO,
77
+ filename=LLM_GGUF_FILE
78
+ )
79
+
80
+ llm = Llama(
81
+ model_path=LLM_LOCAL_PATH,
82
+ n_ctx=4096,
83
+ n_threads=4,
84
+ n_gpu_layers=0 # إذا لديك GPU ضع قيمة أكبر
85
+ )
86
+
87
+ log.info("GGUF model loaded successfully.")
88
+
89
+
90
+ # ---------------- Pyrogram Client ----------------
91
+ tg_client = Client("analyzer_bot", api_id=TG_API_ID, api_hash=TG_API_HASH, bot_token=TG_BOT_TOKEN)
92
+
93
+
94
+ # ---------------- FastAPI ----------------
95
+ app = FastAPI(title="Analyzer Agent")
96
+
97
+
98
+ # ---------------- Helpers ----------------
99
+ def save_log(entry):
100
+ logs = []
101
+ if os.path.exists(LOG_PATH):
102
+ try:
103
+ with open(LOG_PATH, "r", encoding="utf-8") as f:
104
+ logs = json.load(f)
105
+ except Exception:
106
+ logs = []
107
+ logs.insert(0, entry)
108
+ with open(LOG_PATH, "w", encoding="utf-8") as f:
109
+ json.dump(logs, f, ensure_ascii=False, indent=2)
110
+
111
+
112
+ def encode_stats_for_mamba(posts):
113
+ seq = []
114
+ for p in posts:
115
+ seq.append(f"[{p['id']}: VW={p['views']}, FW={p['forwards']}, RC={p['reactions']}]")
116
+ return " ".join(seq)
117
+
118
+
119
+ def run_mamba(text):
120
+ inp = mamba_tok(text, return_tensors="pt")
121
+ with torch.no_grad():
122
+ out = mamba_model.generate(**inp, max_new_tokens=64, do_sample=False)
123
+ return mamba_tok.decode(out[0], skip_special_tokens=True)
124
+
125
+
126
+ def interpret_with_llm(mamba_output):
127
+ prompt = (
128
+ "هذه نتائج تحليل إحصائي لقناة تلغرام:\n"
129
+ f"{mamba_output}\n\n"
130
+ "حلل الأداء واستخرج:\n"
131
+ "- نقاط القوة\n"
132
+ "- نقاط الضعف\n"
133
+ "- أفضل أوقات النشر المتوقعة\n"
134
+ "- نوع المحتوى الذي يرفع الوصول\n"
135
+ "- استراتيجيات لزيادة الاشتراكات والتفاعل\n"
136
+ "اكتب التحليل بالعربية وبشكل مرتب ومختصر."
137
+ )
138
+
139
+ res = llm(
140
+ prompt,
141
+ max_tokens=250,
142
+ temperature=0.3,
143
+ top_p=0.95
144
+ )
145
+
146
+ return res["choices"][0]["text"].strip()
147
+
148
+
149
+ # ---------------- Fetch Telegram Stats ----------------
150
+ async def fetch_telegram_stats(limit=POSTS_LIMIT):
151
+ posts = []
152
+ async with tg_client:
153
+ async for msg in tg_client.get_chat_history(TG_CHANNEL, limit=limit):
154
+ if msg is None:
155
+ continue
156
+ views = getattr(msg, "views", 0) or 0
157
+ forwards = getattr(msg, "forwards", 0) or 0
158
+
159
+ reactions = 0
160
+ if getattr(msg, "reactions", None):
161
+ try:
162
+ reactions = sum([r.count for r in msg.reactions.reactions])
163
+ except Exception:
164
+ reactions = 0
165
+
166
+ posts.append({
167
+ "id": msg.message_id if hasattr(msg, "message_id") else msg.id,
168
+ "date": msg.date.isoformat() if getattr(msg, "date", None) else None,
169
+ "views": views,
170
+ "forwards": forwards,
171
+ "reactions": reactions
172
+ })
173
+ return posts
174
+
175
+
176
+ # ---------------- Main pipeline ----------------
177
+ async def daily_job():
178
+ log.info("Running daily analysis job...")
179
+ posts = await fetch_telegram_stats()
180
+
181
+ if not posts:
182
+ log.warning("No posts found for analysis.")
183
+ entry = {"time": datetime.utcnow().isoformat(), "error": "no_posts"}
184
+ save_log(entry)
185
+ return entry
186
+
187
+ stats_text = encode_stats_for_mamba(posts)
188
+ mamba_out = run_mamba(stats_text)
189
+ interpretation = interpret_with_llm(mamba_out)
190
+
191
+ entry = {
192
+ "time": datetime.utcnow().isoformat(),
193
+ "posts_count": len(posts),
194
+ "stats_text": stats_text,
195
+ "mamba_output": mamba_out,
196
+ "advice": interpretation
197
+ }
198
+
199
+ save_log(entry)
200
+ log.info("Analysis saved.")
201
+ return entry
202
+
203
+
204
+ # ---------------- API endpoints ----------------
205
+ @app.get("/run_once")
206
+ async def run_once():
207
+ return await daily_job()
208
+
209
+
210
+ @app.get("/logs")
211
+ def get_logs():
212
+ if os.path.exists(LOG_PATH):
213
+ with open(LOG_PATH, "r", encoding="utf-8") as f:
214
+ return json.load(f)
215
+ return []
216
+
217
+
218
+ # ---------------- Scheduler ----------------
219
+ scheduler = AsyncIOScheduler()
220
+ scheduler.add_job(lambda: asyncio.create_task(daily_job()), "cron", hour=0, minute=5)
221
+ scheduler.start()
222
+
223
+
224
+ # ---------------- Main ----------------
225
+ if __name__ == "__main__":
226
+ import uvicorn
227
+ log.info("Starting Analyzer Agent...")
228
+ uvicorn.run("analyzer_agent.app:app", host="0.0.0.0", port=int(os.getenv("PORT", "7861")), reload=False)