Spaces:

jesusvilela
/

DearDreadyUnit4

Sleeping

App Files Files Community

jesusvilela commited on Jun 2

Commit

862dde4

verified ·

1 Parent(s): 3d6f41a

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -15

app.py CHANGED Viewed

@@ -39,13 +39,13 @@ try: import whisper; WHISPER_AVAILABLE = True
 except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
 # Google GenAI (Used by LangChain integration AND direct client)
-from google.genai.types import HarmCategory, HarmBlockThreshold # CORRECTED IMPORT
 from google.ai import generativelanguage as glm # For FileState enum
 # LangChain
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
 from langchain.prompts import PromptTemplate
-from langchain.tools import BaseTool, tool as lc_tool_decorator # Use langchain.tools.tool
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
 from langchain_community.tools import DuckDuckGoSearchRun
@@ -261,7 +261,6 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
         logger.error(f"Download error for {file_url_to_try}: {e}", exc_info=True); return f"Error: {str(e)[:100]}"
 # --- Tool Function Definitions ---
-# Corrected: Removed 'description' from @lc_tool_decorator, use docstring
 @lc_tool_decorator
 def read_pdf_tool(action_input_json_str: str) -> str:
     """Reads text content from a PDF file. Input: JSON '{\"file_identifier\": \"FILENAME_OR_URL\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\"}'. Returns extracted text."""
@@ -326,7 +325,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
         if not file_identifier: return "Error: 'file_identifier' for image missing."
         logger.info(f"Direct Multimodal Tool: Processing image '{file_identifier}' with prompt '{text_prompt}'")
         local_image_path = _download_file(file_identifier, task_id)
-        if local_image_path.startswith("Error:"): return f"Error downloading image for Direct Multimodal Tool: {local_image_path}"
         try:
             pil_image = Image.open(local_image_path)
         except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
@@ -337,7 +336,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
         )
         logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
         return response.text[:40000]
-    except json.JSONDecodeError as e_json_mm: return f"Error parsing JSON input for Direct Multimodal Tool: {str(e_json_mm)}. Input: {action_input_json_str}"
     except Exception as e_tool_mm:
         logger.error(f"Error in direct_multimodal_gemini_tool: {e_tool_mm}", exc_info=True)
         return f"Error executing Direct Multimodal Tool: {str(e_tool_mm)}"
@@ -381,8 +380,7 @@ def initialize_agent_and_tools(force_reinit=False):
     logger.info("Initializing agent and tools...")
     if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY not set for LangChain LLM.")
-    # Corrected safety_settings format for ChatGoogleGenerativeAI
-    # Using INTEGER VALUES for HarmCategory keys and HarmBlockThreshold enum members for values.
     llm_safety_settings_corrected_final = {
         HarmCategory.HARM_CATEGORY_HARASSMENT.value: HarmBlockThreshold.BLOCK_NONE.value,
         HarmCategory.HARM_CATEGORY_HATE_SPEECH.value: HarmBlockThreshold.BLOCK_NONE.value,
@@ -395,9 +393,9 @@ def initialize_agent_and_tools(force_reinit=False):
             model=GEMINI_MODEL_NAME,
             google_api_key=GOOGLE_API_KEY,
             temperature=0.0,
-            #safety_settings=llm_safety_settings_corrected_final,
             timeout=120,
-            convert_system_message_to_human=True
         )
         logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME}")
     except Exception as e:
@@ -429,8 +427,17 @@ def initialize_agent_and_tools(force_reinit=False):
             )
             def agent_node(state: AgentState):
                 current_input = state.get('input', '')
-                formatted_system_prompt = prompt_content_lg_init.replace("{input}", current_input)
-                messages_for_llm = [SystemMessage(content=formatted_system_prompt)] + state.get('messages', [])
                 bound_llm = LLM_INSTANCE.bind_tools(TOOLS) # type: ignore
                 response = bound_llm.invoke(messages_for_llm)
                 return {"messages": [response]}
@@ -506,8 +513,8 @@ def get_agent_response(prompt: str, task_id: Optional[str]=None, thread_id: Opti
     try:
         if is_langgraph_agent_get:
             logger.debug(f"Using LangGraph agent (Memory: {LANGGRAPH_MEMORY_SAVER is not None}) for thread: {thread_id_to_use}")
-            initial_messages_lg_get = []
-            input_for_lg_get = {"input": prompt, "messages": initial_messages_lg_get}
             final_state_lg_get = AGENT_INSTANCE.invoke(input_for_lg_get, {"configurable": {"thread_id": thread_id_to_use}}) # type: ignore
             if not final_state_lg_get or 'messages' not in final_state_lg_get or not final_state_lg_get['messages']:
                 logger.error("LangGraph: No final state/messages."); return "[ERROR] LangGraph: No final state/messages."
@@ -600,7 +607,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
-    results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True) # Removed height
     run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
@@ -626,7 +633,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
     demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
 if __name__ == "__main__":
-    logger.info(f"Application starting up (v7 - Final SafetySettings Fix)...")
     if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
     if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
     if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")

 except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
 # Google GenAI (Used by LangChain integration AND direct client)
+from google.genai.types import HarmCategory, HarmBlockThreshold
 from google.ai import generativelanguage as glm # For FileState enum
 # LangChain
 from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
 from langchain.prompts import PromptTemplate
+from langchain.tools import BaseTool, tool as lc_tool_decorator
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain.agents import AgentExecutor, create_react_agent
 from langchain_community.tools import DuckDuckGoSearchRun
         logger.error(f"Download error for {file_url_to_try}: {e}", exc_info=True); return f"Error: {str(e)[:100]}"
 # --- Tool Function Definitions ---
 @lc_tool_decorator
 def read_pdf_tool(action_input_json_str: str) -> str:
     """Reads text content from a PDF file. Input: JSON '{\"file_identifier\": \"FILENAME_OR_URL\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\"}'. Returns extracted text."""
         if not file_identifier: return "Error: 'file_identifier' for image missing."
         logger.info(f"Direct Multimodal Tool: Processing image '{file_identifier}' with prompt '{text_prompt}'")
         local_image_path = _download_file(file_identifier, task_id)
+        if local_image_path.startswith("Error:"): return f"Error downloading image for Direct MM Tool: {local_image_path}"
         try:
             pil_image = Image.open(local_image_path)
         except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
         )
         logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
         return response.text[:40000]
+    except json.JSONDecodeError as e_json_mm: return f"Error parsing JSON input for Direct MM Tool: {str(e_json_mm)}. Input: {action_input_json_str}"
     except Exception as e_tool_mm:
         logger.error(f"Error in direct_multimodal_gemini_tool: {e_tool_mm}", exc_info=True)
         return f"Error executing Direct Multimodal Tool: {str(e_tool_mm)}"
     logger.info("Initializing agent and tools...")
     if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY not set for LangChain LLM.")
+    # Using INTEGER VALUES for HarmCategory keys and HarmBlockThreshold enum .value for values.
     llm_safety_settings_corrected_final = {
         HarmCategory.HARM_CATEGORY_HARASSMENT.value: HarmBlockThreshold.BLOCK_NONE.value,
         HarmCategory.HARM_CATEGORY_HATE_SPEECH.value: HarmBlockThreshold.BLOCK_NONE.value,
             model=GEMINI_MODEL_NAME,
             google_api_key=GOOGLE_API_KEY,
             temperature=0.0,
+            safety_settings=llm_safety_settings_corrected_final,
             timeout=120,
+            convert_system_message_to_human=True # This flag might be interacting with how system prompts are handled
         )
         logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME}")
     except Exception as e:
             )
             def agent_node(state: AgentState):
                 current_input = state.get('input', '')
+                # The LANGGRAPH_PROMPT_TEMPLATE_STR serves as the system message, with the current task input.
+                system_message_content = prompt_content_lg_init.replace("{input}", current_input)
+                # Messages for LLM: System prompt followed by history
+                messages_for_llm = [SystemMessage(content=system_message_content)] + state.get('messages', [])
+                logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
+                if not messages_for_llm or (isinstance(messages_for_llm[0], SystemMessage) and not messages_for_llm[0].content.strip()):
+                    logger.error("LLM call would fail: first message is SystemMessage with no/empty content or messages_for_llm is empty.")
+                    return {"messages": [AIMessage(content="[ERROR] Agent node: System message content is empty.")]}
                 bound_llm = LLM_INSTANCE.bind_tools(TOOLS) # type: ignore
                 response = bound_llm.invoke(messages_for_llm)
                 return {"messages": [response]}
     try:
         if is_langgraph_agent_get:
             logger.debug(f"Using LangGraph agent (Memory: {LANGGRAPH_MEMORY_SAVER is not None}) for thread: {thread_id_to_use}")
+            # The 'input' for LangGraph state is the fully constructed prompt for the task
+            input_for_lg_get = {"input": prompt, "messages": []}
             final_state_lg_get = AGENT_INSTANCE.invoke(input_for_lg_get, {"configurable": {"thread_id": thread_id_to_use}}) # type: ignore
             if not final_state_lg_get or 'messages' not in final_state_lg_get or not final_state_lg_get['messages']:
                 logger.error("LangGraph: No final state/messages."); return "[ERROR] LangGraph: No final state/messages."
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
     status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
+    results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True)
     run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
     demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
 if __name__ == "__main__":
+    logger.info(f"Application starting up (v7 - Corrected HarmCategory/BlockThreshold Import & SafetySettings format)...")
     if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
     if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
     if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")