Spaces:

jesusvilela
/

DearDreadyUnit4

Sleeping

App Files Files Community

jesusvilela commited on Jun 2

Commit

170baad

verified ·

1 Parent(s): 05e4e0d

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -51

app.py CHANGED Viewed

@@ -39,8 +39,8 @@ try: import whisper; WHISPER_AVAILABLE = True
 except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
 # Google GenAI SDK types
-from google.genai.types import HarmCategory, HarmBlockThreshold
-from google.ai import generativelanguage as glm
 # LangChain
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
@@ -99,7 +99,7 @@ try:
                 print("Imported ToolInvocation from langgraph.tools")
             except ImportError as e_ti:
                 print(f"WARNING: Could not import ToolInvocation from langgraph.prebuilt or langgraph.tools: {e_ti}")
-                LGToolInvocationActual = None
         if LGToolInvocationActual is not None or type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
             from langgraph.graph.message import add_messages as lg_add_messages
@@ -255,7 +255,7 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
             name_without_ext, current_ext = os.path.splitext(effective_save_path)
             if not current_ext:
                 content_type_header = r.headers.get('content-type', '')
-                content_type_val = content_type_header.split(';')[0].strip() if content_type_header else ''
                 if content_type_val:
                     guessed_ext = mimetypes.guess_extension(content_type_val)
                     if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
@@ -327,7 +327,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
     """Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-2.0-flash-exp) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
     global google_genai_client
     if not google_genai_client: return "Error: google-genai SDK client not initialized."
-    if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing."
     try:
         data = json.loads(action_input_json_str)
         file_identifier = data.get("file_identifier")
@@ -342,7 +342,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
         except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
         model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
-        response = google_genai_client.models.generate_content(
             model=model_id_for_client, contents=[pil_image, text_prompt]
         )
         logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
@@ -396,7 +396,7 @@ def initialize_agent_and_tools(force_reinit=False):
             model=GEMINI_MODEL_NAME,
             google_api_key=GOOGLE_API_KEY,
             temperature=0.0,
-            # safety_settings is removed to use model defaults to isolate "contents not specified" error
             timeout=120,
             convert_system_message_to_human=False # Explicitly set to False
         )
@@ -417,85 +417,71 @@ def initialize_agent_and_tools(force_reinit=False):
     except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
     logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
-    if LANGGRAPH_FLAVOR_AVAILABLE and all([LG_StateGraph, LG_ToolExecutor_Class, LG_END, LLM_INSTANCE, add_messages]): # LG_ToolInvocation removed from check
         if not LANGGRAPH_MEMORY_SAVER and MemorySaver_Class: LANGGRAPH_MEMORY_SAVER = MemorySaver_Class(); logger.info("LangGraph MemorySaver initialized.")
         try:
             logger.info(f"Attempting LangGraph init (Tool Executor type: {LG_ToolExecutor_Class.__name__ if LG_ToolExecutor_Class else 'None'})")
             _TypedDict = getattr(__import__('typing_extensions'), 'TypedDict', dict)
             class AgentState(_TypedDict): input: str; messages: Annotated[List[Any], add_messages]
-            base_system_prompt_content_template_lg = LANGGRAPH_PROMPT_TEMPLATE_STR.format(
-                tools="\n".join([f"- {t.name}: {t.description}" for t in TOOLS]),
-                input="{current_task_input_placeholder}"
-            )
             def agent_node(state: AgentState):
-                current_task_actual_input = state.get('input', '')
-                system_message_content = base_system_prompt_content_template_lg.replace("{current_task_input_placeholder}", current_task_actual_input)
                 messages_for_llm = [SystemMessage(content=system_message_content)]
-                # If there's history, append it after the SystemMessage.
-                # The current_task_actual_input is now part of the SystemMessage.
-                # The 'messages' in state are previous AIMessages/ToolMessages.
-                messages_for_llm.extend(state.get('messages', []))
                 logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
-                # Check the first message (SystemMessage)
-                if not messages_for_llm or not (isinstance(messages_for_llm[0], SystemMessage) and messages_for_llm[0].content and str(messages_for_llm[0].content).strip()):
-                    logger.error("LLM call would fail in agent_node: First message (SystemMessage) is missing, has no content, is not a string, or content is empty/whitespace.")
-                    return {"messages": [AIMessage(content="[ERROR] Agent node: Initial SystemMessage content is invalid or empty.")]}
                 bound_llm = LLM_INSTANCE.bind_tools(TOOLS)
                 response = bound_llm.invoke(messages_for_llm)
                 return {"messages": [response]}
-            if not LG_ToolExecutor_Class: raise ValueError("LG_ToolExecutor_Class (ToolNode or ToolExecutor) is None for LangGraph.")
             tool_executor_instance_lg = LG_ToolExecutor_Class(tools=TOOLS)
-            # If LG_ToolExecutor_Class is ToolNode, it's often added directly to the graph.
-            # If we keep a custom tool_node, it needs to correctly use the tool_executor_instance_lg.
-            def tool_node(state: AgentState):
                 last_msg = state['messages'][-1] if state.get('messages') and isinstance(state['messages'][-1], AIMessage) else None
                 if not last_msg or not last_msg.tool_calls: return {"messages": []}
-                # ToolNode can often take the AIMessage directly, or a list of tool_calls
-                # The `invoke` method of ToolNode typically expects the full previous message or just tool_calls.
-                # Depending on the exact version and how ToolNode is implemented.
-                # The most straightforward is to let ToolNode handle the AIMessage's tool_calls.
-                # This implies tool_executor_instance_lg should be the node itself.
-                # However, if we must use a custom function:
                 tool_results = []
-                for tc in last_msg.tool_calls: # tc is a dict from AIMessage.tool_calls
                     name, args, tc_id = tc.get('name'), tc.get('args'), tc.get('id')
                     if not all([name, isinstance(args, dict), tc_id]):
                         err_msg=f"Invalid tool_call: {tc}"; logger.error(err_msg)
                         tool_results.append(ToolMessage(f"Error: {err_msg}", tool_call_id=tc_id or "error_id", name=name or "error_tool"))
                         continue
                     try:
-                        logger.info(f"LG Tool Invoking via custom tool_node: '{name}' with {args} (ID: {tc_id})")
-                        # Construct ToolInvocation if LG_ToolInvocation is available and needed by the executor_instance
-                        if LG_ToolInvocation and not isinstance(tool_executor_instance_lg, ToolNode): # ToolNode might not need this
-                             invocation = LG_ToolInvocation(tool=name, tool_input=args)
-                             output_lg = tool_executor_instance_lg.invoke(invocation) # type: ignore
-                        else: # Assume ToolNode or compatible executor can take the dict directly
-                             output_lg = tool_executor_instance_lg.invoke(tc) # Pass the tool_call dict
                         tool_results.append(ToolMessage(content=str(output_lg), tool_call_id=tc_id, name=name))
                     except Exception as e_tool_node_lg:
                         logger.error(f"LG Tool Error ('{name}'): {e_tool_node_lg}", exc_info=True)
                         tool_results.append(ToolMessage(content=f"Error for tool {name}: {str(e_tool_node_lg)}", tool_call_id=tc_id, name=name))
                 return {"messages": tool_results}
             workflow_lg = LG_StateGraph(AgentState) # type: ignore
             workflow_lg.add_node("agent", agent_node)
-            # If LG_ToolExecutor_Class is ToolNode, use the instance directly
-            if type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
-                workflow_lg.add_node("tools", tool_executor_instance_lg)
-                logger.info("Added ToolNode instance directly to LangGraph.")
-            else: # Fallback to custom tool_node (might be needed for older ToolExecutor)
-                workflow_lg.add_node("tools", tool_node)
-                logger.info("Added custom tool_node function to LangGraph.")
             workflow_lg.set_entry_point("agent")
             def should_continue_lg(state: AgentState): return "tools" if state['messages'][-1].tool_calls else LG_END
             workflow_lg.add_conditional_edges("agent", should_continue_lg, {"tools": "tools", LG_END: LG_END}) # type: ignore
@@ -687,7 +673,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
     demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
 if __name__ == "__main__":
-    logger.info(f"Application starting up (v7.1 - Agent Node Message Structure Change)...")
     if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
     if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
     if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")

 except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
 # Google GenAI SDK types
+from google.genai.types import HarmCategory, HarmBlockThreshold # CORRECTED IMPORT
+from google.ai import generativelanguage as glm # For FileState enum
 # LangChain
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
                 print("Imported ToolInvocation from langgraph.tools")
             except ImportError as e_ti:
                 print(f"WARNING: Could not import ToolInvocation from langgraph.prebuilt or langgraph.tools: {e_ti}")
+                LGToolInvocationActual = None # type: ignore
         if LGToolInvocationActual is not None or type(LG_ToolExecutor_Class).__name__ == 'ToolNode':
             from langgraph.graph.message import add_messages as lg_add_messages
             name_without_ext, current_ext = os.path.splitext(effective_save_path)
             if not current_ext:
                 content_type_header = r.headers.get('content-type', '')
+                content_type_val = content_type_header.split(';').strip() if content_type_header else ''
                 if content_type_val:
                     guessed_ext = mimetypes.guess_extension(content_type_val)
                     if guessed_ext: effective_save_path += guessed_ext; logger.info(f"Added guessed ext: {guessed_ext}")
     """Processes an image file (URL or local path) along with a text prompt using a Gemini multimodal model (gemini-2.0-flash-exp) for tasks like image description, Q&A about the image, or text generation based on the image. Input: JSON '{\"file_identifier\": \"IMAGE_FILENAME_OR_URL\", \"text_prompt\": \"Your question or instruction related to the image.\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\" (optional)}'. Returns the model's text response."""
     global google_genai_client
     if not google_genai_client: return "Error: google-genai SDK client not initialized."
+    if not PIL_TESSERACT_AVAILABLE : return "Error: Pillow (PIL) library not available for image processing." # Relies on PIL_TESSERACT_AVAILABLE for PIL
     try:
         data = json.loads(action_input_json_str)
         file_identifier = data.get("file_identifier")
         except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
         model_id_for_client = f"models/{GEMINI_FLASH_MULTIMODAL_MODEL_NAME}" if not GEMINI_FLASH_MULTIMODAL_MODEL_NAME.startswith("models/") else GEMINI_FLASH_MULTIMODAL_MODEL_NAME
+        response = google_genai_client.models.generate_content( # Corrected to use google_genai_client.models
             model=model_id_for_client, contents=[pil_image, text_prompt]
         )
         logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
             model=GEMINI_MODEL_NAME,
             google_api_key=GOOGLE_API_KEY,
             temperature=0.0,
+            # safety_settings parameter is removed to use model's default settings.
             timeout=120,
             convert_system_message_to_human=False # Explicitly set to False
         )
     except Exception as e: logger.warning(f"PythonREPLTool init failed: {e}")
     logger.info(f"Final tools list for agent: {[t.name for t in TOOLS]}")
+    if LANGGRAPH_FLAVOR_AVAILABLE and all([LG_StateGraph, LG_ToolExecutor_Class, LG_END, LLM_INSTANCE, add_messages]): # LG_ToolInvocation removed
         if not LANGGRAPH_MEMORY_SAVER and MemorySaver_Class: LANGGRAPH_MEMORY_SAVER = MemorySaver_Class(); logger.info("LangGraph MemorySaver initialized.")
         try:
             logger.info(f"Attempting LangGraph init (Tool Executor type: {LG_ToolExecutor_Class.__name__ if LG_ToolExecutor_Class else 'None'})")
             _TypedDict = getattr(__import__('typing_extensions'), 'TypedDict', dict)
             class AgentState(_TypedDict): input: str; messages: Annotated[List[Any], add_messages]
+            # System prompt template - this describes the agent's role and tools.
+            # The {input} placeholder for the actual task will be filled by the HumanMessage.
+            base_system_prompt_content_lg = LANGGRAPH_PROMPT_TEMPLATE_STR.split("{input}")[0].strip() + "\nTOOLS:\n{tools}\nRESPONSE FORMAT:\nFinal AIMessage should contain ONLY the answer in 'content' and NO 'tool_calls'. If using tools, 'content' can be thought process, with 'tool_calls'.\nBegin!"
             def agent_node(state: AgentState):
+                current_task_query = state.get('input', '') # The specific question/task for this turn
+                system_message_content = base_system_prompt_content_lg.format(
+                    tools="\n".join([f"- {t.name}: {t.description}" for t in TOOLS])
+                )
                 messages_for_llm = [SystemMessage(content=system_message_content)]
+                messages_for_llm.extend(state.get('messages', [])) # Add history
+                messages_for_llm.append(HumanMessage(content=current_task_query)) # Add current task as HumanMessage
                 logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
+                if not messages_for_llm[-1].content or not str(messages_for_llm[-1].content).strip():
+                    logger.error("LLM call would fail in agent_node: Last HumanMessage content is empty or invalid.")
+                    return {"messages": [AIMessage(content="[ERROR] Agent node: Current task input (HumanMessage) is empty.")]}
                 bound_llm = LLM_INSTANCE.bind_tools(TOOLS)
                 response = bound_llm.invoke(messages_for_llm)
                 return {"messages": [response]}
+            if not LG_ToolExecutor_Class: raise ValueError("LG_ToolExecutor_Class is None for LangGraph.")
             tool_executor_instance_lg = LG_ToolExecutor_Class(tools=TOOLS)
+            def tool_node(state: AgentState): # Custom tool node that expects ToolInvocation if available
                 last_msg = state['messages'][-1] if state.get('messages') and isinstance(state['messages'][-1], AIMessage) else None
                 if not last_msg or not last_msg.tool_calls: return {"messages": []}
                 tool_results = []
+                for tc in last_msg.tool_calls:
                     name, args, tc_id = tc.get('name'), tc.get('args'), tc.get('id')
                     if not all([name, isinstance(args, dict), tc_id]):
                         err_msg=f"Invalid tool_call: {tc}"; logger.error(err_msg)
                         tool_results.append(ToolMessage(f"Error: {err_msg}", tool_call_id=tc_id or "error_id", name=name or "error_tool"))
                         continue
                     try:
+                        logger.info(f"LG Tool Invoking: '{name}' with {args} (ID: {tc_id})")
+                        if LG_ToolInvocation and type(LG_ToolExecutor_Class).__name__ != 'ToolNode': # Check if ToolInvocation exists and we're not using ToolNode directly
+                            invocation = LG_ToolInvocation(tool=name, tool_input=args)
+                            output_lg = tool_executor_instance_lg.invoke(invocation) # type: ignore
+                        else: # Assume ToolNode or compatible executor can take the dict directly if LG_ToolInvocation is None
+                            output_lg = tool_executor_instance_lg.invoke(tc) # type: ignore
                         tool_results.append(ToolMessage(content=str(output_lg), tool_call_id=tc_id, name=name))
                     except Exception as e_tool_node_lg:
                         logger.error(f"LG Tool Error ('{name}'): {e_tool_node_lg}", exc_info=True)
                         tool_results.append(ToolMessage(content=f"Error for tool {name}: {str(e_tool_node_lg)}", tool_call_id=tc_id, name=name))
                 return {"messages": tool_results}
             workflow_lg = LG_StateGraph(AgentState) # type: ignore
             workflow_lg.add_node("agent", agent_node)
+            # If LG_ToolExecutor_Class is ToolNode, it can often be added directly as the node.
+            # workflow_lg.add_node("tools", tool_executor_instance_lg)
+            # For now, using the custom tool_node which wraps the executor instance.
+            workflow_lg.add_node("tools", tool_node)
             workflow_lg.set_entry_point("agent")
             def should_continue_lg(state: AgentState): return "tools" if state['messages'][-1].tool_calls else LG_END
             workflow_lg.add_conditional_edges("agent", should_continue_lg, {"tools": "tools", LG_END: LG_END}) # type: ignore
     demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
 if __name__ == "__main__":
+    logger.info(f"Application starting up (v7.2 - Agent Node Message & LLM Safety Fix)...")
     if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
     if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
     if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")