Nymbo commited on
Commit
cbe0212
·
verified ·
1 Parent(s): 4b97eaa

Update Modules/Web_Fetch.py

Browse files
Files changed (1) hide show
  1. Modules/Web_Fetch.py +9 -16
Modules/Web_Fetch.py CHANGED
@@ -14,6 +14,13 @@ from app import _fetch_rate_limiter, _log_call_end, _log_call_start, _truncate_f
14
  from ._docstrings import autodoc
15
 
16
 
 
 
 
 
 
 
 
17
  def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
18
  headers = {
19
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
@@ -167,12 +174,7 @@ def _truncate_markdown(markdown: str, max_chars: int) -> Tuple[str, Dict[str, ob
167
  return truncated + truncation_notice, metadata
168
 
169
 
170
- @autodoc(
171
- summary=(
172
- "Fetch a webpage and return clean Markdown or a list of links, with max length and pagination via offset."
173
- ),
174
- returns="Markdown content (or links) possibly with a truncation notice when max_chars is exceeded.",
175
- )
176
  def Web_Fetch(
177
  url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
178
  max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
@@ -265,16 +267,7 @@ def build_interface() -> gr.Interface:
265
  "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, "
266
  "or extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
267
  ),
268
- api_description=(
269
- "Fetch a web page and return it converted to Markdown format or extract links with configurable options. "
270
- "Includes enhanced truncation with detailed metadata and pagination support via offset parameter. "
271
- "Parameters: url (str - absolute URL), max_chars (int - 0=no limit, default 3000), "
272
- "strip_selectors (str - CSS selectors to remove, comma-separated), "
273
- "url_scraper (bool - extract only links instead of content, default False), "
274
- "offset (int - character offset for pagination, use next_cursor from previous call). "
275
- "When content is truncated, returns detailed metadata including truncated status, character counts, "
276
- "and next_cursor for continuation. When url_scraper=True, returns formatted list of all links found on the page."
277
- ),
278
  flagging_mode="never",
279
  )
280
 
 
14
  from ._docstrings import autodoc
15
 
16
 
17
+ # Single source of truth for the LLM-facing tool description
18
+ TOOL_SUMMARY = (
19
+ "Fetch a webpage and return clean Markdown or a list of links, with max length and pagination via offset; "
20
+ "if truncated, the output includes a notice with next_cursor for exact continuation."
21
+ )
22
+
23
+
24
  def _http_get_enhanced(url: str, timeout: int | float = 30, *, skip_rate_limit: bool = False) -> requests.Response:
25
  headers = {
26
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
 
174
  return truncated + truncation_notice, metadata
175
 
176
 
177
+ @autodoc(summary=TOOL_SUMMARY)
 
 
 
 
 
178
  def Web_Fetch(
179
  url: Annotated[str, "The absolute URL to fetch (must return HTML)."],
180
  max_chars: Annotated[int, "Maximum characters to return (0 = no limit, full page content)."] = 3000,
 
267
  "<div style=\"text-align:center\">Convert any webpage to clean Markdown format with precision controls, "
268
  "or extract all links. Supports custom element removal, length limits, and pagination with offset.</div>"
269
  ),
270
+ api_description=TOOL_SUMMARY,
 
 
 
 
 
 
 
 
 
271
  flagging_mode="never",
272
  )
273