Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -39,13 +39,13 @@ try: import whisper; WHISPER_AVAILABLE = True
|
|
| 39 |
except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
|
| 40 |
|
| 41 |
# Google GenAI (Used by LangChain integration AND direct client)
|
| 42 |
-
from google.genai.types import HarmCategory, HarmBlockThreshold
|
| 43 |
from google.ai import generativelanguage as glm # For FileState enum
|
| 44 |
|
| 45 |
# LangChain
|
| 46 |
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
|
| 47 |
from langchain.prompts import PromptTemplate
|
| 48 |
-
from langchain.tools import BaseTool, tool as lc_tool_decorator
|
| 49 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 50 |
from langchain.agents import AgentExecutor, create_react_agent
|
| 51 |
from langchain_community.tools import DuckDuckGoSearchRun
|
|
@@ -261,7 +261,6 @@ def _download_file(file_identifier: str, task_id_for_file: Optional[str] = None)
|
|
| 261 |
logger.error(f"Download error for {file_url_to_try}: {e}", exc_info=True); return f"Error: {str(e)[:100]}"
|
| 262 |
|
| 263 |
# --- Tool Function Definitions ---
|
| 264 |
-
# Corrected: Removed 'description' from @lc_tool_decorator, use docstring
|
| 265 |
@lc_tool_decorator
|
| 266 |
def read_pdf_tool(action_input_json_str: str) -> str:
|
| 267 |
"""Reads text content from a PDF file. Input: JSON '{\"file_identifier\": \"FILENAME_OR_URL\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\"}'. Returns extracted text."""
|
|
@@ -326,7 +325,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
|
|
| 326 |
if not file_identifier: return "Error: 'file_identifier' for image missing."
|
| 327 |
logger.info(f"Direct Multimodal Tool: Processing image '{file_identifier}' with prompt '{text_prompt}'")
|
| 328 |
local_image_path = _download_file(file_identifier, task_id)
|
| 329 |
-
if local_image_path.startswith("Error:"): return f"Error downloading image for Direct
|
| 330 |
try:
|
| 331 |
pil_image = Image.open(local_image_path)
|
| 332 |
except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
|
|
@@ -337,7 +336,7 @@ def direct_multimodal_gemini_tool(action_input_json_str: str) -> str:
|
|
| 337 |
)
|
| 338 |
logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
|
| 339 |
return response.text[:40000]
|
| 340 |
-
except json.JSONDecodeError as e_json_mm: return f"Error parsing JSON input for Direct
|
| 341 |
except Exception as e_tool_mm:
|
| 342 |
logger.error(f"Error in direct_multimodal_gemini_tool: {e_tool_mm}", exc_info=True)
|
| 343 |
return f"Error executing Direct Multimodal Tool: {str(e_tool_mm)}"
|
|
@@ -381,8 +380,7 @@ def initialize_agent_and_tools(force_reinit=False):
|
|
| 381 |
logger.info("Initializing agent and tools...")
|
| 382 |
if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY not set for LangChain LLM.")
|
| 383 |
|
| 384 |
-
#
|
| 385 |
-
# Using INTEGER VALUES for HarmCategory keys and HarmBlockThreshold enum members for values.
|
| 386 |
llm_safety_settings_corrected_final = {
|
| 387 |
HarmCategory.HARM_CATEGORY_HARASSMENT.value: HarmBlockThreshold.BLOCK_NONE.value,
|
| 388 |
HarmCategory.HARM_CATEGORY_HATE_SPEECH.value: HarmBlockThreshold.BLOCK_NONE.value,
|
|
@@ -395,9 +393,9 @@ def initialize_agent_and_tools(force_reinit=False):
|
|
| 395 |
model=GEMINI_MODEL_NAME,
|
| 396 |
google_api_key=GOOGLE_API_KEY,
|
| 397 |
temperature=0.0,
|
| 398 |
-
|
| 399 |
timeout=120,
|
| 400 |
-
convert_system_message_to_human=True
|
| 401 |
)
|
| 402 |
logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME}")
|
| 403 |
except Exception as e:
|
|
@@ -429,8 +427,17 @@ def initialize_agent_and_tools(force_reinit=False):
|
|
| 429 |
)
|
| 430 |
def agent_node(state: AgentState):
|
| 431 |
current_input = state.get('input', '')
|
| 432 |
-
|
| 433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
bound_llm = LLM_INSTANCE.bind_tools(TOOLS) # type: ignore
|
| 435 |
response = bound_llm.invoke(messages_for_llm)
|
| 436 |
return {"messages": [response]}
|
|
@@ -506,8 +513,8 @@ def get_agent_response(prompt: str, task_id: Optional[str]=None, thread_id: Opti
|
|
| 506 |
try:
|
| 507 |
if is_langgraph_agent_get:
|
| 508 |
logger.debug(f"Using LangGraph agent (Memory: {LANGGRAPH_MEMORY_SAVER is not None}) for thread: {thread_id_to_use}")
|
| 509 |
-
|
| 510 |
-
input_for_lg_get = {"input": prompt, "messages":
|
| 511 |
final_state_lg_get = AGENT_INSTANCE.invoke(input_for_lg_get, {"configurable": {"thread_id": thread_id_to_use}}) # type: ignore
|
| 512 |
if not final_state_lg_get or 'messages' not in final_state_lg_get or not final_state_lg_get['messages']:
|
| 513 |
logger.error("LangGraph: No final state/messages."); return "[ERROR] LangGraph: No final state/messages."
|
|
@@ -600,7 +607,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
|
|
| 600 |
gr.LoginButton()
|
| 601 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 602 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
|
| 603 |
-
results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True)
|
| 604 |
|
| 605 |
run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
|
| 606 |
|
|
@@ -626,7 +633,7 @@ with gr.Blocks(css=".gradio-container {max-width:1280px !important;margin:auto !
|
|
| 626 |
demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
|
| 627 |
|
| 628 |
if __name__ == "__main__":
|
| 629 |
-
logger.info(f"Application starting up (v7 -
|
| 630 |
if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
|
| 631 |
if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
|
| 632 |
if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
|
|
|
|
| 39 |
except ImportError: WHISPER_AVAILABLE = False; print("WARNING: OpenAI Whisper not found, Audio Transcription tool will be disabled.")
|
| 40 |
|
| 41 |
# Google GenAI (Used by LangChain integration AND direct client)
|
| 42 |
+
from google.genai.types import HarmCategory, HarmBlockThreshold
|
| 43 |
from google.ai import generativelanguage as glm # For FileState enum
|
| 44 |
|
| 45 |
# LangChain
|
| 46 |
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage
|
| 47 |
from langchain.prompts import PromptTemplate
|
| 48 |
+
from langchain.tools import BaseTool, tool as lc_tool_decorator
|
| 49 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 50 |
from langchain.agents import AgentExecutor, create_react_agent
|
| 51 |
from langchain_community.tools import DuckDuckGoSearchRun
|
|
|
|
| 261 |
logger.error(f"Download error for {file_url_to_try}: {e}", exc_info=True); return f"Error: {str(e)[:100]}"
|
| 262 |
|
| 263 |
# --- Tool Function Definitions ---
|
|
|
|
| 264 |
@lc_tool_decorator
|
| 265 |
def read_pdf_tool(action_input_json_str: str) -> str:
|
| 266 |
"""Reads text content from a PDF file. Input: JSON '{\"file_identifier\": \"FILENAME_OR_URL\", \"task_id\": \"TASK_ID_IF_GAIA_FILENAME_ONLY\"}'. Returns extracted text."""
|
|
|
|
| 325 |
if not file_identifier: return "Error: 'file_identifier' for image missing."
|
| 326 |
logger.info(f"Direct Multimodal Tool: Processing image '{file_identifier}' with prompt '{text_prompt}'")
|
| 327 |
local_image_path = _download_file(file_identifier, task_id)
|
| 328 |
+
if local_image_path.startswith("Error:"): return f"Error downloading image for Direct MM Tool: {local_image_path}"
|
| 329 |
try:
|
| 330 |
pil_image = Image.open(local_image_path)
|
| 331 |
except Exception as e_img_open: return f"Error opening image file {local_image_path}: {str(e_img_open)}"
|
|
|
|
| 336 |
)
|
| 337 |
logger.info(f"Direct Multimodal Tool: Response received from {model_id_for_client} received.")
|
| 338 |
return response.text[:40000]
|
| 339 |
+
except json.JSONDecodeError as e_json_mm: return f"Error parsing JSON input for Direct MM Tool: {str(e_json_mm)}. Input: {action_input_json_str}"
|
| 340 |
except Exception as e_tool_mm:
|
| 341 |
logger.error(f"Error in direct_multimodal_gemini_tool: {e_tool_mm}", exc_info=True)
|
| 342 |
return f"Error executing Direct Multimodal Tool: {str(e_tool_mm)}"
|
|
|
|
| 380 |
logger.info("Initializing agent and tools...")
|
| 381 |
if not GOOGLE_API_KEY: raise ValueError("GOOGLE_API_KEY not set for LangChain LLM.")
|
| 382 |
|
| 383 |
+
# Using INTEGER VALUES for HarmCategory keys and HarmBlockThreshold enum .value for values.
|
|
|
|
| 384 |
llm_safety_settings_corrected_final = {
|
| 385 |
HarmCategory.HARM_CATEGORY_HARASSMENT.value: HarmBlockThreshold.BLOCK_NONE.value,
|
| 386 |
HarmCategory.HARM_CATEGORY_HATE_SPEECH.value: HarmBlockThreshold.BLOCK_NONE.value,
|
|
|
|
| 393 |
model=GEMINI_MODEL_NAME,
|
| 394 |
google_api_key=GOOGLE_API_KEY,
|
| 395 |
temperature=0.0,
|
| 396 |
+
safety_settings=llm_safety_settings_corrected_final,
|
| 397 |
timeout=120,
|
| 398 |
+
convert_system_message_to_human=True # This flag might be interacting with how system prompts are handled
|
| 399 |
)
|
| 400 |
logger.info(f"LangChain LLM (Planner) initialized: {GEMINI_MODEL_NAME}")
|
| 401 |
except Exception as e:
|
|
|
|
| 427 |
)
|
| 428 |
def agent_node(state: AgentState):
|
| 429 |
current_input = state.get('input', '')
|
| 430 |
+
# The LANGGRAPH_PROMPT_TEMPLATE_STR serves as the system message, with the current task input.
|
| 431 |
+
system_message_content = prompt_content_lg_init.replace("{input}", current_input)
|
| 432 |
+
|
| 433 |
+
# Messages for LLM: System prompt followed by history
|
| 434 |
+
messages_for_llm = [SystemMessage(content=system_message_content)] + state.get('messages', [])
|
| 435 |
+
|
| 436 |
+
logger.debug(f"LangGraph agent_node - messages_for_llm: {messages_for_llm}")
|
| 437 |
+
if not messages_for_llm or (isinstance(messages_for_llm[0], SystemMessage) and not messages_for_llm[0].content.strip()):
|
| 438 |
+
logger.error("LLM call would fail: first message is SystemMessage with no/empty content or messages_for_llm is empty.")
|
| 439 |
+
return {"messages": [AIMessage(content="[ERROR] Agent node: System message content is empty.")]}
|
| 440 |
+
|
| 441 |
bound_llm = LLM_INSTANCE.bind_tools(TOOLS) # type: ignore
|
| 442 |
response = bound_llm.invoke(messages_for_llm)
|
| 443 |
return {"messages": [response]}
|
|
|
|
| 513 |
try:
|
| 514 |
if is_langgraph_agent_get:
|
| 515 |
logger.debug(f"Using LangGraph agent (Memory: {LANGGRAPH_MEMORY_SAVER is not None}) for thread: {thread_id_to_use}")
|
| 516 |
+
# The 'input' for LangGraph state is the fully constructed prompt for the task
|
| 517 |
+
input_for_lg_get = {"input": prompt, "messages": []}
|
| 518 |
final_state_lg_get = AGENT_INSTANCE.invoke(input_for_lg_get, {"configurable": {"thread_id": thread_id_to_use}}) # type: ignore
|
| 519 |
if not final_state_lg_get or 'messages' not in final_state_lg_get or not final_state_lg_get['messages']:
|
| 520 |
logger.error("LangGraph: No final state/messages."); return "[ERROR] LangGraph: No final state/messages."
|
|
|
|
| 607 |
gr.LoginButton()
|
| 608 |
run_button = gr.Button("Run Evaluation & Submit All Answers")
|
| 609 |
status_output = gr.Textbox(label="Run Status / Submission Result", lines=7, interactive=False)
|
| 610 |
+
results_table = gr.DataFrame(label="Q&A Log", headers=["Task ID","Question","Prompt","Raw","Submitted"], wrap=True)
|
| 611 |
|
| 612 |
run_button.click(fn=run_and_submit_all, outputs=[status_output,results_table], api_name="run_evaluation")
|
| 613 |
|
|
|
|
| 633 |
demo.load(update_ui_on_load_fn_within_context, [], [agent_status_display, missing_secrets_display])
|
| 634 |
|
| 635 |
if __name__ == "__main__":
|
| 636 |
+
logger.info(f"Application starting up (v7 - Corrected HarmCategory/BlockThreshold Import & SafetySettings format)...")
|
| 637 |
if not PYPDF2_AVAILABLE: logger.warning("PyPDF2 (PDF tool) NOT AVAILABLE.")
|
| 638 |
if not PIL_TESSERACT_AVAILABLE: logger.warning("Pillow/Pytesseract (OCR tool) NOT AVAILABLE.")
|
| 639 |
if not WHISPER_AVAILABLE: logger.warning("Whisper (Audio tool) NOT AVAILABLE.")
|