Update Modules/Web_Fetch.py
Browse files- Modules/Web_Fetch.py +9 -16
Modules/Web_Fetch.py
CHANGED
|
@@ -14,6 +14,13 @@ from app import _fetch_rate_limiter, _log_call_end, _log_call_start, _truncate_f
|
|
| 14 |
from ._docstrings import autodoc
|
| 15 |
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
|
| 18 |
headers = {
|
| 19 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
@@ -167,12 +174,7 @@ def _truncate_markdown(markdown: str, max_chars: int) -> Tuple[str, Dict[str, ob
|
|
| 167 |
return truncated + truncation_notice, metadata
|
| 168 |
|
| 169 |
|
| 170 |
-
@autodoc(
|
| 171 |
-
summary=(
|
| 172 |
-
"Fetch a webpage and return clean Markdown or a list of links, with max length and pagination via offset."
|
| 173 |
-
),
|
| 174 |
-
returns="Markdown content (or links) possibly with a truncation notice when max_chars is exceeded.",
|
| 175 |
-
)
|
| 176 |
def Web_Fetch(
|
| 177 |
url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
|
| 178 |
max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
|
|
@@ -265,16 +267,7 @@ def build_interface() -> gr.Interface:
|
|
| 265 |
"<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, "
|
| 266 |
"or extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
|
| 267 |
),
|
| 268 |
-
api_description=
|
| 269 |
-
"Fetch a web page and return it converted to Markdown format or extract links with configurable options. "
|
| 270 |
-
"Includes enhanced truncation with detailed metadata and pagination support via offset parameter. "
|
| 271 |
-
"Parameters: url (str - absolute URL), max_chars (int - 0=no limit, default 3000), "
|
| 272 |
-
"strip_selectors (str - CSS selectors to remove, comma-separated), "
|
| 273 |
-
"url_scraper (bool - extract only links instead of content, default False), "
|
| 274 |
-
"offset (int - character offset for pagination, use next_cursor from previous call). "
|
| 275 |
-
"When content is truncated, returns detailed metadata including truncated status, character counts, "
|
| 276 |
-
"and next_cursor for continuation. When url_scraper=True, returns formatted list of all links found on the page."
|
| 277 |
-
),
|
| 278 |
flagging_mode="never",
|
| 279 |
)
|
| 280 |
|
|
|
|
| 14 |
from ._docstrings import autodoc
|
| 15 |
|
| 16 |
|
| 17 |
+
# Single source of truth for the LLM-facing tool description
|
| 18 |
+
TOOL_SUMMARY = (
|
| 19 |
+
"Fetch a webpage and return clean Markdown or a list of links, with max length and pagination via offset; "
|
| 20 |
+
"if truncated, the output includes a notice with next_cursor for exact continuation."
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
|
| 25 |
headers = {
|
| 26 |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
|
|
| 174 |
return truncated + truncation_notice, metadata
|
| 175 |
|
| 176 |
|
| 177 |
+
@autodoc(summary=TOOL_SUMMARY)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
def Web_Fetch(
|
| 179 |
url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
|
| 180 |
max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
|
|
|
|
| 267 |
"<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, "
|
| 268 |
"or extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
|
| 269 |
),
|
| 270 |
+
api_description=TOOL_SUMMARY,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
flagging_mode="never",
|
| 272 |
)
|
| 273 |
|