[\s\S]*?<\\/think>", "", raw, flags=re.IGNORECASE)
no_think = re.sub(r"<\\/?think>", "", no_think, flags=re.IGNORECASE)
except Exception:
no_think = raw
try:
paragraphs = [p for p in re.split(r"\n\s*\n", no_think) if p.strip()]
keep: List[str] = []
removed = 0
planning_re = re.compile(r"\b(let me|now i(?:'ll| will)?|first,|i will now|i will|i'll|let's|now let me|i need to|now i'll|now i will)\b", re.IGNORECASE)
for paragraph in paragraphs:
if planning_re.search(paragraph):
removed += 1
continue
keep.append(paragraph)
report = "\n\n".join(keep).strip()
if not report:
report = no_think.strip()
except Exception:
report = no_think
removed = 0
report = re.sub(r"\n\s*\n\s*\n+", "\n\n", report)
try:
print(f"[POSTPROCESS] removed_planning_paragraphs={removed}, raw_chars={len(raw)}, final_chars={len(report)}", flush=True)
except Exception:
pass
links_text = "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
if links_text:
sources_section = "\n\n## Sources\n" + "\n".join([f"[{i+1}] {url}" for i, url in enumerate(pages.keys())])
report = report.rstrip() + sources_section
file_path = _write_report_tmp(report)
elapsed = time.time() - start_ts
print(f"[TIMING] Deep_Research elapsed: {elapsed:.2f}s", flush=True)
_log_call_end("Deep_Research", f"urls={len(pages)} file={os.path.basename(file_path)} duration={elapsed:.2f}s")
return report, links_text, file_path
def build_interface() -> gr.Interface:
return gr.Interface(
fn=Deep_Research,
inputs=[
gr.Textbox(label="Summarization of research topic", lines=3, placeholder="Briefly summarize the research topic or user question"),
gr.Textbox(label="DDG Search Query 1", max_lines=1),
gr.Slider(1, 50, value=10, step=1, label="Max results (Q1)"),
gr.Textbox(label="DDG Search Query 2", value="", max_lines=1),
gr.Slider(1, 50, value=10, step=1, label="Max results (Q2)"),
gr.Textbox(label="DDG Search Query 3", value="", max_lines=1),
gr.Slider(1, 50, value=10, step=1, label="Max results (Q3)"),
gr.Textbox(label="DDG Search Query 4", value="", max_lines=1),
gr.Slider(1, 50, value=10, step=1, label="Max results (Q4)"),
gr.Textbox(label="DDG Search Query 5", value="", max_lines=1),
gr.Slider(1, 50, value=10, step=1, label="Max results (Q5)"),
],
outputs=[
gr.Markdown(label="Research Report"),
gr.Textbox(label="Fetched Links", lines=8),
gr.File(label="Download Research Report", file_count="single"),
],
title="Deep Research",
description=(
"Perform multi-query web research: search with DuckDuckGo, fetch up to 50 pages in parallel, "
"and generate a comprehensive report using a large LLM via Hugging Face Inference Providers (Cerebras). Requires HF_READ_TOKEN.
"
),
api_description=TOOL_SUMMARY,
flagging_mode="never",
show_api=bool(HF_TEXTGEN_TOKEN),
)
__all__ = ["Deep_Research", "build_interface"]