jesusvilela commited on
Commit
170baad
·
verified ·
1 Parent(s): 05e4e0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -51
app.py CHANGED
@@ -39,8 +39,8 @@ try: import whisper; WHISPER_AVAILABLE = True
39
  except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
40
 
41
  # Google GenAI SDK types
42
- from google.genai.types import HarmCategory, HarmBlockThreshold
43
- from google.ai import generativelanguage as glm
44
 
45
  # LangChain
46
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
@@ -99,7 +99,7 @@ try:
99
  print("Imported ToolInvocation from langgraph.tools")
100
  except ImportError as e_ti:
101
  print(f"WARNING: Could not import ToolInvocation from langgraph.prebuilt or langgraph.tools: {e_ti}")
102
- LGToolInvocationActual = None
103
 
104
  if LGToolInvocationActual is not None or type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
105
  from langgraph.graph.message import add_messages as lg_add_messages
@@ -255,7 +255,7 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
255
  name_without_ext, current_ext = os.path.splitext(effective_save_path)
256
  if not current_ext:
257
  content_type_header = r.headers.get('content-type', '')
258
- content_type_val = content_type_header.split(';')[0].strip() if content_type_header else ''
259
  if content_type_val:
260
  guessed_ext = mimetypes.guess_extension(content_type_val)
261
  if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
@@ -327,7 +327,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
327
  """Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-2.0-flash-exp) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
328
  global google_genai_client
329
  if not google_genai_client: return "Error: google-genai SDK client not initialized."
330
- if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing."
331
  try:
332
  data = json.loads(action_input_json_str)
333
  file_identifier = data.get("file_identifier")
@@ -342,7 +342,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
342
  except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
343
 
344
  model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
345
- response = google_genai_client.models.generate_content(
346
  model=model_id_for_client, contents=[pil_image, text_prompt]
347
  )
348
  logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
@@ -396,7 +396,7 @@ def initialize_agent_and_tools(force_reinit=False):
396
  model=GEMINI_MODEL_NAME,
397
  google_api_key=GOOGLE_API_KEY,
398
  temperature=0.0,
399
- # safety_settings is removed to use model defaults to isolate "contents not specified" error
400
  timeout=120,
401
  convert_system_message_to_human=False # Explicitly set to False
402
  )
@@ -417,85 +417,71 @@ def initialize_agent_and_tools(force_reinit=False):
417
  except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
418
  logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
419
 
420
- if LANGGRAPH_FLAVOR_AVAILABLE and all([LG_StateGraph, LG_ToolExecutor_Class, LG_END, LLM_INSTANCE, add_messages]): # LG_ToolInvocation removed from check
421
  if not LANGGRAPH_MEMORY_SAVER and MemorySaver_Class: LANGGRAPH_MEMORY_SAVER = MemorySaver_Class(); logger.info("LangGraph MemorySaver initialized.")
422
  try:
423
  logger.info(f"Attempting LangGraph init (Tool Executor type: {LG_ToolExecutor_Class.__name__ if LG_ToolExecutor_Class else 'None'})")
424
  _TypedDict = getattr(__import__('typing_extensions'), 'TypedDict', dict)
425
  class AgentState(_TypedDict): input: str; messages: Annotated[List[Any], add_messages]
426
 
427
- base_system_prompt_content_template_lg = LANGGRAPH_PROMPT_TEMPLATE_STR.format(
428
- tools="\n".join([f"- {t.name}: {t.description}" for t in TOOLS]),
429
- input="{current_task_input_placeholder}"
430
- )
431
 
432
  def agent_node(state: AgentState):
433
- current_task_actual_input = state.get('input', '')
434
- system_message_content = base_system_prompt_content_template_lg.replace("{current_task_input_placeholder}", current_task_actual_input)
 
 
 
435
 
436
  messages_for_llm = [SystemMessage(content=system_message_content)]
437
- # If there's history, append it after the SystemMessage.
438
- # The current_task_actual_input is now part of the SystemMessage.
439
- # The 'messages' in state are previous AIMessages/ToolMessages.
440
- messages_for_llm.extend(state.get('messages', []))
441
 
442
  logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
443
- # Check the first message (SystemMessage)
444
- if not messages_for_llm or not (isinstance(messages_for_llm[0], SystemMessage) and messages_for_llm[0].content and str(messages_for_llm[0].content).strip()):
445
- logger.error("LLM call would fail in agent_node: First message (SystemMessage) is missing, has no content, is not a string, or content is empty/whitespace.")
446
- return {"messages": [AIMessage(content="[ERROR] Agent node: Initial SystemMessage content is invalid or empty.")]}
447
 
448
  bound_llm = LLM_INSTANCE.bind_tools(TOOLS)
449
  response = bound_llm.invoke(messages_for_llm)
450
  return {"messages": [response]}
451
 
452
- if not LG_ToolExecutor_Class: raise ValueError("LG_ToolExecutor_Class (ToolNode or ToolExecutor) is None for LangGraph.")
453
  tool_executor_instance_lg = LG_ToolExecutor_Class(tools=TOOLS)
454
 
455
- # If LG_ToolExecutor_Class is ToolNode, it's often added directly to the graph.
456
- # If we keep a custom tool_node, it needs to correctly use the tool_executor_instance_lg.
457
- def tool_node(state: AgentState):
458
  last_msg = state['messages'][-1] if state.get('messages') and isinstance(state['messages'][-1], AIMessage) else None
459
  if not last_msg or not last_msg.tool_calls: return {"messages": []}
460
-
461
- # ToolNode can often take the AIMessage directly, or a list of tool_calls
462
- # The `invoke` method of ToolNode typically expects the full previous message or just tool_calls.
463
- # Depending on the exact version and how ToolNode is implemented.
464
- # The most straightforward is to let ToolNode handle the AIMessage's tool_calls.
465
- # This implies tool_executor_instance_lg should be the node itself.
466
- # However, if we must use a custom function:
467
  tool_results = []
468
- for tc in last_msg.tool_calls: # tc is a dict from AIMessage.tool_calls
469
  name, args, tc_id = tc.get('name'), tc.get('args'), tc.get('id')
470
  if not all([name, isinstance(args, dict), tc_id]):
471
  err_msg=f"Invalid tool_call: {tc}"; logger.error(err_msg)
472
  tool_results.append(ToolMessage(f"Error: {err_msg}", tool_call_id=tc_id or "error_id", name=name or "error_tool"))
473
  continue
474
  try:
475
- logger.info(f"LG Tool Invoking via custom tool_node: '{name}' with {args} (ID: {tc_id})")
476
- # Construct ToolInvocation if LG_ToolInvocation is available and needed by the executor_instance
477
- if LG_ToolInvocation and not isinstance(tool_executor_instance_lg, ToolNode): # ToolNode might not need this
478
- invocation = LG_ToolInvocation(tool=name, tool_input=args)
479
- output_lg = tool_executor_instance_lg.invoke(invocation) # type: ignore
480
- else: # Assume ToolNode or compatible executor can take the dict directly
481
- output_lg = tool_executor_instance_lg.invoke(tc) # Pass the tool_call dict
482
-
483
  tool_results.append(ToolMessage(content=str(output_lg), tool_call_id=tc_id, name=name))
484
  except Exception as e_tool_node_lg:
485
  logger.error(f"LG Tool Error ('{name}'): {e_tool_node_lg}", exc_info=True)
486
  tool_results.append(ToolMessage(content=f"Error for tool {name}: {str(e_tool_node_lg)}", tool_call_id=tc_id, name=name))
487
  return {"messages": tool_results}
488
 
 
489
  workflow_lg = LG_StateGraph(AgentState) # type: ignore
490
  workflow_lg.add_node("agent", agent_node)
491
- # If LG_ToolExecutor_Class is ToolNode, use the instance directly
492
- if type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
493
- workflow_lg.add_node("tools", tool_executor_instance_lg)
494
- logger.info("Added ToolNode instance directly to LangGraph.")
495
- else: # Fallback to custom tool_node (might be needed for older ToolExecutor)
496
- workflow_lg.add_node("tools", tool_node)
497
- logger.info("Added custom tool_node function to LangGraph.")
498
-
499
  workflow_lg.set_entry_point("agent")
500
  def should_continue_lg(state: AgentState): return "tools" if state['messages'][-1].tool_calls else LG_END
501
  workflow_lg.add_conditional_edges("agent", should_continue_lg, {"tools": "tools", LG_END: LG_END}) # type: ignore
@@ -687,7 +673,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
687
  demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
688
 
689
  if __name__ == "__main__":
690
- logger.info(f"Application starting up (v7.1 - Agent Node Message Structure Change)...")
691
  if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
692
  if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
693
  if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
 
39
  except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
40
 
41
  # Google GenAI SDK types
42
+ from google.genai.types import HarmCategory, HarmBlockThreshold # CORRECTED IMPORT
43
+ from google.ai import generativelanguage as glm # For FileState enum
44
 
45
  # LangChain
46
  from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
 
99
  print("Imported ToolInvocation from langgraph.tools")
100
  except ImportError as e_ti:
101
  print(f"WARNING: Could not import ToolInvocation from langgraph.prebuilt or langgraph.tools: {e_ti}")
102
+ LGToolInvocationActual = None # type: ignore
103
 
104
  if LGToolInvocationActual is not None or type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
105
  from langgraph.graph.message import add_messages as lg_add_messages
 
255
  name_without_ext, current_ext = os.path.splitext(effective_save_path)
256
  if not current_ext:
257
  content_type_header = r.headers.get('content-type', '')
258
+ content_type_val = content_type_header.split(';').strip() if content_type_header else ''
259
  if content_type_val:
260
  guessed_ext = mimetypes.guess_extension(content_type_val)
261
  if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
 
327
  """Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-2.0-flash-exp) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
328
  global google_genai_client
329
  if not google_genai_client: return "Error: google-genai SDK client not initialized."
330
+ if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing." # Relies on PIL_TESSERACT_AVAILABLE for PIL
331
  try:
332
  data = json.loads(action_input_json_str)
333
  file_identifier = data.get("file_identifier")
 
342
  except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
343
 
344
  model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
345
+ response = google_genai_client.models.generate_content( # Corrected to use google_genai_client.models
346
  model=model_id_for_client, contents=[pil_image, text_prompt]
347
  )
348
  logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
 
396
  model=GEMINI_MODEL_NAME,
397
  google_api_key=GOOGLE_API_KEY,
398
  temperature=0.0,
399
+ # safety_settings parameter is removed to use model's default settings.
400
  timeout=120,
401
  convert_system_message_to_human=False # Explicitly set to False
402
  )
 
417
  except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
418
  logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
419
 
420
+ if LANGGRAPH_FLAVOR_AVAILABLE and all([LG_StateGraph, LG_ToolExecutor_Class, LG_END, LLM_INSTANCE, add_messages]): # LG_ToolInvocation removed
421
  if not LANGGRAPH_MEMORY_SAVER and MemorySaver_Class: LANGGRAPH_MEMORY_SAVER = MemorySaver_Class(); logger.info("LangGraph MemorySaver initialized.")
422
  try:
423
  logger.info(f"Attempting LangGraph init (Tool Executor type: {LG_ToolExecutor_Class.__name__ if LG_ToolExecutor_Class else 'None'})")
424
  _TypedDict = getattr(__import__('typing_extensions'), 'TypedDict', dict)
425
  class AgentState(_TypedDict): input: str; messages: Annotated[List[Any], add_messages]
426
 
427
+ # System prompt template - this describes the agent's role and tools.
428
+ # The {input} placeholder for the actual task will be filled by the HumanMessage.
429
+ base_system_prompt_content_lg = LANGGRAPH_PROMPT_TEMPLATE_STR.split("{input}")[0].strip() + "\nTOOLS:\n{tools}\nRESPONSE FORMAT:\nFinal AIMessage should contain ONLY the answer in 'content' and NO 'tool_calls'. If using tools, 'content' can be thought process, with 'tool_calls'.\nBegin!"
430
+
431
 
432
  def agent_node(state: AgentState):
433
+ current_task_query = state.get('input', '') # The specific question/task for this turn
434
+
435
+ system_message_content = base_system_prompt_content_lg.format(
436
+ tools="\n".join([f"- {t.name}: {t.description}" for t in TOOLS])
437
+ )
438
 
439
  messages_for_llm = [SystemMessage(content=system_message_content)]
440
+ messages_for_llm.extend(state.get('messages', [])) # Add history
441
+ messages_for_llm.append(HumanMessage(content=current_task_query)) # Add current task as HumanMessage
 
 
442
 
443
  logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
444
+ if not messages_for_llm[-1].content or not str(messages_for_llm[-1].content).strip():
445
+ logger.error("LLM call would fail in agent_node: Last HumanMessage content is empty or invalid.")
446
+ return {"messages": [AIMessage(content="[ERROR] Agent node: Current task input (HumanMessage) is empty.")]}
 
447
 
448
  bound_llm = LLM_INSTANCE.bind_tools(TOOLS)
449
  response = bound_llm.invoke(messages_for_llm)
450
  return {"messages": [response]}
451
 
452
+ if not LG_ToolExecutor_Class: raise ValueError("LG_ToolExecutor_Class is None for LangGraph.")
453
  tool_executor_instance_lg = LG_ToolExecutor_Class(tools=TOOLS)
454
 
455
+ def tool_node(state: AgentState): # Custom tool node that expects ToolInvocation if available
 
 
456
  last_msg = state['messages'][-1] if state.get('messages') and isinstance(state['messages'][-1], AIMessage) else None
457
  if not last_msg or not last_msg.tool_calls: return {"messages": []}
 
 
 
 
 
 
 
458
  tool_results = []
459
+ for tc in last_msg.tool_calls:
460
  name, args, tc_id = tc.get('name'), tc.get('args'), tc.get('id')
461
  if not all([name, isinstance(args, dict), tc_id]):
462
  err_msg=f"Invalid tool_call: {tc}"; logger.error(err_msg)
463
  tool_results.append(ToolMessage(f"Error: {err_msg}", tool_call_id=tc_id or "error_id", name=name or "error_tool"))
464
  continue
465
  try:
466
+ logger.info(f"LG Tool Invoking: '{name}' with {args} (ID: {tc_id})")
467
+ if LG_ToolInvocation and type(LG_ToolExecutor_Class).__name__ != 'ToolNode': # Check if ToolInvocation exists and we're not using ToolNode directly
468
+ invocation = LG_ToolInvocation(tool=name, tool_input=args)
469
+ output_lg = tool_executor_instance_lg.invoke(invocation) # type: ignore
470
+ else: # Assume ToolNode or compatible executor can take the dict directly if LG_ToolInvocation is None
471
+ output_lg = tool_executor_instance_lg.invoke(tc) # type: ignore
 
 
472
  tool_results.append(ToolMessage(content=str(output_lg), tool_call_id=tc_id, name=name))
473
  except Exception as e_tool_node_lg:
474
  logger.error(f"LG Tool Error ('{name}'): {e_tool_node_lg}", exc_info=True)
475
  tool_results.append(ToolMessage(content=f"Error for tool {name}: {str(e_tool_node_lg)}", tool_call_id=tc_id, name=name))
476
  return {"messages": tool_results}
477
 
478
+
479
  workflow_lg = LG_StateGraph(AgentState) # type: ignore
480
  workflow_lg.add_node("agent", agent_node)
481
+ # If LG_ToolExecutor_Class is ToolNode, it can often be added directly as the node.
482
+ # workflow_lg.add_node("tools", tool_executor_instance_lg)
483
+ # For now, using the custom tool_node which wraps the executor instance.
484
+ workflow_lg.add_node("tools", tool_node)
 
 
 
 
485
  workflow_lg.set_entry_point("agent")
486
  def should_continue_lg(state: AgentState): return "tools" if state['messages'][-1].tool_calls else LG_END
487
  workflow_lg.add_conditional_edges("agent", should_continue_lg, {"tools": "tools", LG_END: LG_END}) # type: ignore
 
673
  demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
674
 
675
  if __name__ == "__main__":
676
+ logger.info(f"Application starting up (v7.2 - Agent Node Message & LLM Safety Fix)...")
677
  if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
678
  if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
679
  if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")