Spaces:
Running
Running
Update app-backup.py
Browse files- app-backup.py +63 -16
app-backup.py
CHANGED
|
@@ -40,7 +40,7 @@ def extract_keywords(text: str, top_k: int = 5) -> str:
|
|
| 40 |
return " ".join(key_tokens)
|
| 41 |
|
| 42 |
##############################################################################
|
| 43 |
-
#
|
| 44 |
# - ์์ 20๊ฐ ๊ฒฐ๊ณผ JSON์ LLM์ ๋๊ธธ ๋ link, snippet ๋ฑ ๋ชจ๋ ํฌํจ
|
| 45 |
##############################################################################
|
| 46 |
def do_web_search(query: str) -> str:
|
|
@@ -50,30 +50,77 @@ def do_web_search(query: str) -> str:
|
|
| 50 |
"""
|
| 51 |
try:
|
| 52 |
url = "https://api.serphouse.com/serp/live"
|
|
|
|
|
|
|
| 53 |
params = {
|
| 54 |
"q": query,
|
| 55 |
"domain": "google.com",
|
| 56 |
-
"
|
| 57 |
"device": "desktop",
|
| 58 |
-
"
|
| 59 |
-
"
|
| 60 |
-
"api_token": SERPHOUSE_API_KEY,
|
| 61 |
}
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
results = data.get("results", {})
|
| 67 |
-
organic =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
if not organic:
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
summary_lines = []
|
| 72 |
-
for idx, item in enumerate(
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
return "\n".join(summary_lines)
|
|
|
|
| 77 |
except Exception as e:
|
| 78 |
logger.error(f"Web search failed: {e}")
|
| 79 |
return f"Web search failed: {str(e)}"
|
|
|
|
| 40 |
return " ".join(key_tokens)
|
| 41 |
|
| 42 |
##############################################################################
|
| 43 |
+
# SerpHouse Live endpoint ํธ์ถ
|
| 44 |
# - ์์ 20๊ฐ ๊ฒฐ๊ณผ JSON์ LLM์ ๋๊ธธ ๋ link, snippet ๋ฑ ๋ชจ๋ ํฌํจ
|
| 45 |
##############################################################################
|
| 46 |
def do_web_search(query: str) -> str:
|
|
|
|
| 50 |
"""
|
| 51 |
try:
|
| 52 |
url = "https://api.serphouse.com/serp/live"
|
| 53 |
+
|
| 54 |
+
# ๊ธฐ๋ณธ GET ๋ฐฉ์์ผ๋ก ํ๋ผ๋ฏธํฐ ๊ฐ์ํํ๊ณ ๊ฒฐ๊ณผ ์๋ฅผ 20๊ฐ๋ก ์ ํ
|
| 55 |
params = {
|
| 56 |
"q": query,
|
| 57 |
"domain": "google.com",
|
| 58 |
+
"serp_type": "web", # ๊ธฐ๋ณธ ์น ๊ฒ์
|
| 59 |
"device": "desktop",
|
| 60 |
+
"lang": "en",
|
| 61 |
+
"num": "20" # ์ต๋ 20๊ฐ ๊ฒฐ๊ณผ๋ง ์์ฒญ
|
|
|
|
| 62 |
}
|
| 63 |
+
|
| 64 |
+
headers = {
|
| 65 |
+
"Authorization": f"Bearer {SERPHOUSE_API_KEY}"
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
logger.info(f"SerpHouse API ํธ์ถ ์ค... ๊ฒ์์ด: {query}")
|
| 69 |
+
logger.info(f"์์ฒญ URL: {url} - ํ๋ผ๋ฏธํฐ: {params}")
|
| 70 |
+
|
| 71 |
+
# GET ์์ฒญ ์ํ
|
| 72 |
+
response = requests.get(url, headers=headers, params=params, timeout=30)
|
| 73 |
+
response.raise_for_status()
|
| 74 |
+
|
| 75 |
+
logger.info(f"SerpHouse API ์๋ต ์ํ ์ฝ๋: {response.status_code}")
|
| 76 |
+
data = response.json()
|
| 77 |
+
|
| 78 |
+
# ๋ค์ํ ์๋ต ๊ตฌ์กฐ ์ฒ๋ฆฌ
|
| 79 |
results = data.get("results", {})
|
| 80 |
+
organic = None
|
| 81 |
+
|
| 82 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 1
|
| 83 |
+
if isinstance(results, dict) and "organic" in results:
|
| 84 |
+
organic = results["organic"]
|
| 85 |
+
|
| 86 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 2 (์ค์ฒฉ๋ results)
|
| 87 |
+
elif isinstance(results, dict) and "results" in results:
|
| 88 |
+
if isinstance(results["results"], dict) and "organic" in results["results"]:
|
| 89 |
+
organic = results["results"]["organic"]
|
| 90 |
+
|
| 91 |
+
# ๊ฐ๋ฅํ ์๋ต ๊ตฌ์กฐ 3 (์ต์์ organic)
|
| 92 |
+
elif "organic" in data:
|
| 93 |
+
organic = data["organic"]
|
| 94 |
+
|
| 95 |
if not organic:
|
| 96 |
+
logger.warning("์๋ต์์ organic ๊ฒฐ๊ณผ๋ฅผ ์ฐพ์ ์ ์์ต๋๋ค.")
|
| 97 |
+
logger.debug(f"์๋ต ๊ตฌ์กฐ: {list(data.keys())}")
|
| 98 |
+
if isinstance(results, dict):
|
| 99 |
+
logger.debug(f"results ๊ตฌ์กฐ: {list(results.keys())}")
|
| 100 |
+
return "No web search results found or unexpected API response structure."
|
| 101 |
+
|
| 102 |
+
# ๊ฒฐ๊ณผ ์ ์ ํ ๋ฐ ์ปจํ
์คํธ ๊ธธ์ด ์ต์ ํ
|
| 103 |
+
max_results = min(20, len(organic))
|
| 104 |
+
limited_organic = organic[:max_results]
|
| 105 |
+
|
| 106 |
+
# ๊ฒฐ๊ณผ ํ์ ๊ฐ์ํ - ์ ์ฒด JSON ๋์ ์ค์ ํ๋๋ง ํฌํจ
|
| 107 |
summary_lines = []
|
| 108 |
+
for idx, item in enumerate(limited_organic, start=1):
|
| 109 |
+
title = item.get("title", "No title")
|
| 110 |
+
link = item.get("link", "#")
|
| 111 |
+
snippet = item.get("snippet", "No description")
|
| 112 |
+
|
| 113 |
+
# ๊ฐ์ํ๋ ํ์
|
| 114 |
+
summary_lines.append(
|
| 115 |
+
f"Result {idx}:\n"
|
| 116 |
+
f"- Title: {title}\n"
|
| 117 |
+
f"- Link: {link}\n"
|
| 118 |
+
f"- Snippet: {snippet}\n"
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
logger.info(f"๊ฒ์ ๊ฒฐ๊ณผ {len(limited_organic)}๊ฐ ์ฒ๋ฆฌ ์๋ฃ")
|
| 122 |
return "\n".join(summary_lines)
|
| 123 |
+
|
| 124 |
except Exception as e:
|
| 125 |
logger.error(f"Web search failed: {e}")
|
| 126 |
return f"Web search failed: {str(e)}"
|