Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,8 +39,8 @@ try: import whisper; WHISPER_AVAILABLE = True
|
|
| 39 |
except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
|
| 40 |
|
| 41 |
# Google GenAI SDK types
|
| 42 |
-
from google.genai.types import HarmCategory, HarmBlockThreshold
|
| 43 |
-
from google.ai import generativelanguage as glm
|
| 44 |
|
| 45 |
# LangChain
|
| 46 |
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
|
|
@@ -99,7 +99,7 @@ try:
|
|
| 99 |
print("Imported ToolInvocation from langgraph.tools")
|
| 100 |
except ImportError as e_ti:
|
| 101 |
print(f"WARNING: Could not import ToolInvocation from langgraph.prebuilt or langgraph.tools: {e_ti}")
|
| 102 |
-
LGToolInvocationActual = None
|
| 103 |
|
| 104 |
if LGToolInvocationActual is not None or type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
|
| 105 |
from langgraph.graph.message import add_messages as lg_add_messages
|
|
@@ -255,7 +255,7 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
|
|
| 255 |
name_without_ext, current_ext = os.path.splitext(effective_save_path)
|
| 256 |
if not current_ext:
|
| 257 |
content_type_header = r.headers.get('content-type', '')
|
| 258 |
-
content_type_val = content_type_header.split(';')
|
| 259 |
if content_type_val:
|
| 260 |
guessed_ext = mimetypes.guess_extension(content_type_val)
|
| 261 |
if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
|
|
@@ -327,7 +327,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
|
|
| 327 |
"""Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-2.0-flash-exp) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
|
| 328 |
global google_genai_client
|
| 329 |
if not google_genai_client: return "Error: google-genai SDK client not initialized."
|
| 330 |
-
if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing."
|
| 331 |
try:
|
| 332 |
data = json.loads(action_input_json_str)
|
| 333 |
file_identifier = data.get("file_identifier")
|
|
@@ -342,7 +342,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
|
|
| 342 |
except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
|
| 343 |
|
| 344 |
model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
|
| 345 |
-
response = google_genai_client.models.generate_content(
|
| 346 |
model=model_id_for_client, contents=[pil_image, text_prompt]
|
| 347 |
)
|
| 348 |
logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
|
|
@@ -396,7 +396,7 @@ def initialize_agent_and_tools(force_reinit=False):
|
|
| 396 |
model=GEMINI_MODEL_NAME,
|
| 397 |
google_api_key=GOOGLE_API_KEY,
|
| 398 |
temperature=0.0,
|
| 399 |
-
# safety_settings is removed to use model
|
| 400 |
timeout=120,
|
| 401 |
convert_system_message_to_human=False # Explicitly set to False
|
| 402 |
)
|
|
@@ -417,85 +417,71 @@ def initialize_agent_and_tools(force_reinit=False):
|
|
| 417 |
except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
|
| 418 |
logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
|
| 419 |
|
| 420 |
-
if LANGGRAPH_FLAVOR_AVAILABLE and all([LG_StateGraph, LG_ToolExecutor_Class, LG_END, LLM_INSTANCE, add_messages]): # LG_ToolInvocation removed
|
| 421 |
if not LANGGRAPH_MEMORY_SAVER and MemorySaver_Class: LANGGRAPH_MEMORY_SAVER = MemorySaver_Class(); logger.info("LangGraph MemorySaver initialized.")
|
| 422 |
try:
|
| 423 |
logger.info(f"Attempting LangGraph init (Tool Executor type: {LG_ToolExecutor_Class.__name__ if LG_ToolExecutor_Class else 'None'})")
|
| 424 |
_TypedDict = getattr(__import__('typing_extensions'), 'TypedDict', dict)
|
| 425 |
class AgentState(_TypedDict): input: str; messages: Annotated[List[Any], add_messages]
|
| 426 |
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
|
| 432 |
def agent_node(state: AgentState):
|
| 433 |
-
|
| 434 |
-
|
|
|
|
|
|
|
|
|
|
| 435 |
|
| 436 |
messages_for_llm = [SystemMessage(content=system_message_content)]
|
| 437 |
-
|
| 438 |
-
#
|
| 439 |
-
# The 'messages' in state are previous AIMessages/ToolMessages.
|
| 440 |
-
messages_for_llm.extend(state.get('messages', []))
|
| 441 |
|
| 442 |
logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
return {"messages": [AIMessage(content="[ERROR] Agent node: Initial SystemMessage content is invalid or empty.")]}
|
| 447 |
|
| 448 |
bound_llm = LLM_INSTANCE.bind_tools(TOOLS)
|
| 449 |
response = bound_llm.invoke(messages_for_llm)
|
| 450 |
return {"messages": [response]}
|
| 451 |
|
| 452 |
-
if not LG_ToolExecutor_Class: raise ValueError("LG_ToolExecutor_Class
|
| 453 |
tool_executor_instance_lg = LG_ToolExecutor_Class(tools=TOOLS)
|
| 454 |
|
| 455 |
-
|
| 456 |
-
# If we keep a custom tool_node, it needs to correctly use the tool_executor_instance_lg.
|
| 457 |
-
def tool_node(state: AgentState):
|
| 458 |
last_msg = state['messages'][-1] if state.get('messages') and isinstance(state['messages'][-1], AIMessage) else None
|
| 459 |
if not last_msg or not last_msg.tool_calls: return {"messages": []}
|
| 460 |
-
|
| 461 |
-
# ToolNode can often take the AIMessage directly, or a list of tool_calls
|
| 462 |
-
# The `invoke` method of ToolNode typically expects the full previous message or just tool_calls.
|
| 463 |
-
# Depending on the exact version and how ToolNode is implemented.
|
| 464 |
-
# The most straightforward is to let ToolNode handle the AIMessage's tool_calls.
|
| 465 |
-
# This implies tool_executor_instance_lg should be the node itself.
|
| 466 |
-
# However, if we must use a custom function:
|
| 467 |
tool_results = []
|
| 468 |
-
for tc in last_msg.tool_calls:
|
| 469 |
name, args, tc_id = tc.get('name'), tc.get('args'), tc.get('id')
|
| 470 |
if not all([name, isinstance(args, dict), tc_id]):
|
| 471 |
err_msg=f"Invalid tool_call: {tc}"; logger.error(err_msg)
|
| 472 |
tool_results.append(ToolMessage(f"Error: {err_msg}", tool_call_id=tc_id or "error_id", name=name or "error_tool"))
|
| 473 |
continue
|
| 474 |
try:
|
| 475 |
-
logger.info(f"LG Tool Invoking
|
| 476 |
-
#
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
output_lg = tool_executor_instance_lg.invoke(tc) # Pass the tool_call dict
|
| 482 |
-
|
| 483 |
tool_results.append(ToolMessage(content=str(output_lg), tool_call_id=tc_id, name=name))
|
| 484 |
except Exception as e_tool_node_lg:
|
| 485 |
logger.error(f"LG Tool Error ('{name}'): {e_tool_node_lg}", exc_info=True)
|
| 486 |
tool_results.append(ToolMessage(content=f"Error for tool {name}: {str(e_tool_node_lg)}", tool_call_id=tc_id, name=name))
|
| 487 |
return {"messages": tool_results}
|
| 488 |
|
|
|
|
| 489 |
workflow_lg = LG_StateGraph(AgentState) # type: ignore
|
| 490 |
workflow_lg.add_node("agent", agent_node)
|
| 491 |
-
# If LG_ToolExecutor_Class is ToolNode,
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
else: # Fallback to custom tool_node (might be needed for older ToolExecutor)
|
| 496 |
-
workflow_lg.add_node("tools", tool_node)
|
| 497 |
-
logger.info("Added custom tool_node function to LangGraph.")
|
| 498 |
-
|
| 499 |
workflow_lg.set_entry_point("agent")
|
| 500 |
def should_continue_lg(state: AgentState): return "tools" if state['messages'][-1].tool_calls else LG_END
|
| 501 |
workflow_lg.add_conditional_edges("agent", should_continue_lg, {"tools": "tools", LG_END: LG_END}) # type: ignore
|
|
@@ -687,7 +673,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
|
|
| 687 |
demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
|
| 688 |
|
| 689 |
if __name__ == "__main__":
|
| 690 |
-
logger.info(f"Application starting up (v7.
|
| 691 |
if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
|
| 692 |
if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
|
| 693 |
if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
|
|
|
|
| 39 |
except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
|
| 40 |
|
| 41 |
# Google GenAI SDK types
|
| 42 |
+
from google.genai.types import HarmCategory, HarmBlockThreshold # CORRECTED IMPORT
|
| 43 |
+
from google.ai import generativelanguage as glm # For FileState enum
|
| 44 |
|
| 45 |
# LangChain
|
| 46 |
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
|
|
|
|
| 99 |
print("Imported ToolInvocation from langgraph.tools")
|
| 100 |
except ImportError as e_ti:
|
| 101 |
print(f"WARNING: Could not import ToolInvocation from langgraph.prebuilt or langgraph.tools: {e_ti}")
|
| 102 |
+
LGToolInvocationActual = None # type: ignore
|
| 103 |
|
| 104 |
if LGToolInvocationActual is not None or type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
|
| 105 |
from langgraph.graph.message import add_messages as lg_add_messages
|
|
|
|
| 255 |
name_without_ext, current_ext = os.path.splitext(effective_save_path)
|
| 256 |
if not current_ext:
|
| 257 |
content_type_header = r.headers.get('content-type', '')
|
| 258 |
+
content_type_val = content_type_header.split(';').strip() if content_type_header else ''
|
| 259 |
if content_type_val:
|
| 260 |
guessed_ext = mimetypes.guess_extension(content_type_val)
|
| 261 |
if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
|
|
|
|
| 327 |
"""Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-2.0-flash-exp) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
|
| 328 |
global google_genai_client
|
| 329 |
if not google_genai_client: return "Error: google-genai SDK client not initialized."
|
| 330 |
+
if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing." # Relies on PIL_TESSERACT_AVAILABLE for PIL
|
| 331 |
try:
|
| 332 |
data = json.loads(action_input_json_str)
|
| 333 |
file_identifier = data.get("file_identifier")
|
|
|
|
| 342 |
except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
|
| 343 |
|
| 344 |
model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
|
| 345 |
+
response = google_genai_client.models.generate_content( # Corrected to use google_genai_client.models
|
| 346 |
model=model_id_for_client, contents=[pil_image, text_prompt]
|
| 347 |
)
|
| 348 |
logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
|
|
|
|
| 396 |
model=GEMINI_MODEL_NAME,
|
| 397 |
google_api_key=GOOGLE_API_KEY,
|
| 398 |
temperature=0.0,
|
| 399 |
+
# safety_settings parameter is removed to use model's default settings.
|
| 400 |
timeout=120,
|
| 401 |
convert_system_message_to_human=False # Explicitly set to False
|
| 402 |
)
|
|
|
|
| 417 |
except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
|
| 418 |
logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
|
| 419 |
|
| 420 |
+
if LANGGRAPH_FLAVOR_AVAILABLE and all([LG_StateGraph, LG_ToolExecutor_Class, LG_END, LLM_INSTANCE, add_messages]): # LG_ToolInvocation removed
|
| 421 |
if not LANGGRAPH_MEMORY_SAVER and MemorySaver_Class: LANGGRAPH_MEMORY_SAVER = MemorySaver_Class(); logger.info("LangGraph MemorySaver initialized.")
|
| 422 |
try:
|
| 423 |
logger.info(f"Attempting LangGraph init (Tool Executor type: {LG_ToolExecutor_Class.__name__ if LG_ToolExecutor_Class else 'None'})")
|
| 424 |
_TypedDict = getattr(__import__('typing_extensions'), 'TypedDict', dict)
|
| 425 |
class AgentState(_TypedDict): input: str; messages: Annotated[List[Any], add_messages]
|
| 426 |
|
| 427 |
+
# System prompt template - this describes the agent's role and tools.
|
| 428 |
+
# The {input} placeholder for the actual task will be filled by the HumanMessage.
|
| 429 |
+
base_system_prompt_content_lg = LANGGRAPH_PROMPT_TEMPLATE_STR.split("{input}")[0].strip() + "\nTOOLS:\n{tools}\nRESPONSE FORMAT:\nFinal AIMessage should contain ONLY the answer in 'content' and NO 'tool_calls'. If using tools, 'content' can be thought process, with 'tool_calls'.\nBegin!"
|
| 430 |
+
|
| 431 |
|
| 432 |
def agent_node(state: AgentState):
|
| 433 |
+
current_task_query = state.get('input', '') # The specific question/task for this turn
|
| 434 |
+
|
| 435 |
+
system_message_content = base_system_prompt_content_lg.format(
|
| 436 |
+
tools="\n".join([f"- {t.name}: {t.description}" for t in TOOLS])
|
| 437 |
+
)
|
| 438 |
|
| 439 |
messages_for_llm = [SystemMessage(content=system_message_content)]
|
| 440 |
+
messages_for_llm.extend(state.get('messages', [])) # Add history
|
| 441 |
+
messages_for_llm.append(HumanMessage(content=current_task_query)) # Add current task as HumanMessage
|
|
|
|
|
|
|
| 442 |
|
| 443 |
logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
|
| 444 |
+
if not messages_for_llm[-1].content or not str(messages_for_llm[-1].content).strip():
|
| 445 |
+
logger.error("LLM call would fail in agent_node: Last HumanMessage content is empty or invalid.")
|
| 446 |
+
return {"messages": [AIMessage(content="[ERROR] Agent node: Current task input (HumanMessage) is empty.")]}
|
|
|
|
| 447 |
|
| 448 |
bound_llm = LLM_INSTANCE.bind_tools(TOOLS)
|
| 449 |
response = bound_llm.invoke(messages_for_llm)
|
| 450 |
return {"messages": [response]}
|
| 451 |
|
| 452 |
+
if not LG_ToolExecutor_Class: raise ValueError("LG_ToolExecutor_Class is None for LangGraph.")
|
| 453 |
tool_executor_instance_lg = LG_ToolExecutor_Class(tools=TOOLS)
|
| 454 |
|
| 455 |
+
def tool_node(state: AgentState): # Custom tool node that expects ToolInvocation if available
|
|
|
|
|
|
|
| 456 |
last_msg = state['messages'][-1] if state.get('messages') and isinstance(state['messages'][-1], AIMessage) else None
|
| 457 |
if not last_msg or not last_msg.tool_calls: return {"messages": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
tool_results = []
|
| 459 |
+
for tc in last_msg.tool_calls:
|
| 460 |
name, args, tc_id = tc.get('name'), tc.get('args'), tc.get('id')
|
| 461 |
if not all([name, isinstance(args, dict), tc_id]):
|
| 462 |
err_msg=f"Invalid tool_call: {tc}"; logger.error(err_msg)
|
| 463 |
tool_results.append(ToolMessage(f"Error: {err_msg}", tool_call_id=tc_id or "error_id", name=name or "error_tool"))
|
| 464 |
continue
|
| 465 |
try:
|
| 466 |
+
logger.info(f"LG Tool Invoking: '{name}' with {args} (ID: {tc_id})")
|
| 467 |
+
if LG_ToolInvocation and type(LG_ToolExecutor_Class).__name__ != 'ToolNode': # Check if ToolInvocation exists and we're not using ToolNode directly
|
| 468 |
+
invocation = LG_ToolInvocation(tool=name, tool_input=args)
|
| 469 |
+
output_lg = tool_executor_instance_lg.invoke(invocation) # type: ignore
|
| 470 |
+
else: # Assume ToolNode or compatible executor can take the dict directly if LG_ToolInvocation is None
|
| 471 |
+
output_lg = tool_executor_instance_lg.invoke(tc) # type: ignore
|
|
|
|
|
|
|
| 472 |
tool_results.append(ToolMessage(content=str(output_lg), tool_call_id=tc_id, name=name))
|
| 473 |
except Exception as e_tool_node_lg:
|
| 474 |
logger.error(f"LG Tool Error ('{name}'): {e_tool_node_lg}", exc_info=True)
|
| 475 |
tool_results.append(ToolMessage(content=f"Error for tool {name}: {str(e_tool_node_lg)}", tool_call_id=tc_id, name=name))
|
| 476 |
return {"messages": tool_results}
|
| 477 |
|
| 478 |
+
|
| 479 |
workflow_lg = LG_StateGraph(AgentState) # type: ignore
|
| 480 |
workflow_lg.add_node("agent", agent_node)
|
| 481 |
+
# If LG_ToolExecutor_Class is ToolNode, it can often be added directly as the node.
|
| 482 |
+
# workflow_lg.add_node("tools", tool_executor_instance_lg)
|
| 483 |
+
# For now, using the custom tool_node which wraps the executor instance.
|
| 484 |
+
workflow_lg.add_node("tools", tool_node)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
workflow_lg.set_entry_point("agent")
|
| 486 |
def should_continue_lg(state: AgentState): return "tools" if state['messages'][-1].tool_calls else LG_END
|
| 487 |
workflow_lg.add_conditional_edges("agent", should_continue_lg, {"tools": "tools", LG_END: LG_END}) # type: ignore
|
|
|
|
| 673 |
demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
|
| 674 |
|
| 675 |
if __name__ == "__main__":
|
| 676 |
+
logger.info(f"Application starting up (v7.2 - Agent Node Message & LLM Safety Fix)...")
|
| 677 |
if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
|
| 678 |
if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
|
| 679 |
if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
|