jesusgj commited on
Commit
4350623
Β·
1 Parent(s): ca36f6a

Modified files

Browse files
Files changed (2) hide show
  1. agent.py +54 -43
  2. requirements.txt +2 -7
agent.py CHANGED
@@ -1,20 +1,16 @@
1
  import os
2
  import time
3
  import logging
4
- import urllib.parse as urlparse
5
- import io
6
- import contextlib
7
  import re
8
  from functools import lru_cache, wraps
9
- from typing import Optional, Dict, Any
10
 
11
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
12
  from dotenv import load_dotenv
13
  from requests.exceptions import RequestException
14
- import serpapi
15
  import wikipedia
16
- from llama_index.core import VectorStoreIndex, download_loader
17
  from llama_index.core.schema import Document
 
18
 
19
  from smolagents import (
20
  CodeAgent,
@@ -77,6 +73,7 @@ def normalize_answer_format(answer: str) -> str:
77
  answer = answer.strip().rstrip('.')
78
  is_list = ',' in answer and len(answer.split(',')) > 1
79
  try:
 
80
  is_numeric = not is_list and float(answer.replace(',', '')) is not None
81
  except ValueError:
82
  is_numeric = False
@@ -118,47 +115,50 @@ def initialize_agent():
118
 
119
  @tool
120
  @retry
121
- def query_webpage(url: str, query: str) -> str:
 
122
  """
123
- Extracts specific information from a webpage by asking a targeted question.
124
 
125
  Args:
126
- url (str): The full URL of the webpage to query.
127
- query (str): The specific question to ask about the webpage's content.
128
  """
129
- logging.info(f"πŸ“„ Querying webpage: {url}")
130
- loader = download_loader("BeautifulSoupWebReader")()
131
  docs = loader.load_data(urls=[url])
132
- if not docs: raise ValueError(f"No content could be extracted from {url}")
133
- index = VectorStoreIndex.from_documents(docs)
134
- query_engine = index.as_query_engine(response_mode="tree_summarize")
135
- response = query_engine.query(query)
136
- return str(response)
137
 
138
  @tool
139
  @retry
140
- def query_youtube_video(video_url: str, query: str) -> str:
 
141
  """
142
- Extracts specific information from a YouTube video transcript.
143
 
144
  Args:
145
  video_url (str): The full URL of the YouTube video.
146
- query (str): The specific question to ask about the video's content.
147
  """
148
- logging.info(f"🎬 Querying YouTube video: {video_url}")
149
  video_id_match = re.search(r'(?:v=|\/)([a-zA-Z0-9_-]{11}).*', video_url)
150
- if not video_id_match: return "Error: Invalid YouTube URL."
 
151
  video_id = video_id_match.group(1)
152
 
153
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
154
- doc = Document(text=' '.join([t['text'] for t in transcript]))
155
- index = VectorStoreIndex.from_documents([doc])
156
- query_engine = index.as_query_engine()
157
- response = query_engine.query(query)
158
- return str(response)
 
 
159
 
160
  @tool
161
  @retry
 
162
  def wikipedia_search(query: str) -> str:
163
  """
164
  Searches Wikipedia for a given query and returns a summary.
@@ -171,7 +171,7 @@ def initialize_agent():
171
  except wikipedia.exceptions.PageError:
172
  return f"No Wikipedia page found for '{query}'."
173
  except wikipedia.exceptions.DisambiguationError as e:
174
- return f"Ambiguous query '{query}'. Options: {e.options[:3]}"
175
  except Exception as e:
176
  return f"An error occurred during Wikipedia search: {e}"
177
 
@@ -187,7 +187,14 @@ def initialize_agent():
187
 
188
  google_search_tool = GoogleSearchTool(provider='serpapi', serpapi_api_key=api_keys['serpapi']) if api_keys['serpapi'] else None
189
 
190
- tools_list = [tool for tool in [google_search_tool, query_webpage, query_youtube_video, wikipedia_search] if tool]
 
 
 
 
 
 
 
191
 
192
  agent = CodeAgent(
193
  model=model,
@@ -195,18 +202,19 @@ def initialize_agent():
195
  instructions="""You are a master AI assistant for the GAIA benchmark. Your goal is to provide a single, precise, and final answer by writing and executing Python code.
196
 
197
  **STRATEGY:**
198
- You have a powerful toolkit. You can write and execute any Python code you need. You also have access to pre-defined tools that you can call from within your code.
199
 
200
  1. **Analyze**: Break down the user's question into logical steps.
201
- 2. **Plan**: Decide if you need to search the web, query a webpage, or perform a calculation.
202
  3. **Execute**: Write a Python script to perform the steps.
203
- * For web searches, use `GoogleSearchTool()`.
204
  * For Wikipedia lookups, use `wikipedia_search()`.
205
- * For complex calculations or data manipulation, write the Python code directly.
206
- * To query a specific webpage, use `query_webpage()`.
 
207
 
208
  **HOW TO USE TOOLS IN YOUR CODE:**
209
- To solve a problem, you will write a Python code block that calls the necessary tools.
210
 
211
  *Example 1: Simple Calculation*
212
  ```python
@@ -216,14 +224,17 @@ def initialize_agent():
216
  print(int(result))
217
  ```
218
 
219
- *Example 2: Multi-step question involving web search*
220
  ```python
221
- # Find the birth date of the author of 'Pride and Prejudice'
222
- author_name_info = GoogleSearchTool(query="author of Pride and Prejudice")
223
- # Let's assume the tool returns "Jane Austen"
224
- # Now get the birth date from Wikipedia
225
- birth_date_info = wikipedia_search(query="Jane Austen birth date")
226
- print(birth_date_info)
 
 
 
227
  ```
228
 
229
  **CRITICAL INSTRUCTION:** You MUST end your entire response with the line `FINAL ANSWER: [Your Final Answer]`. This is the only part of your response that will be graded. Adhere to strict formatting: no extra words, no currency symbols, no commas in numbers.
 
1
  import os
2
  import time
3
  import logging
 
 
 
4
  import re
5
  from functools import lru_cache, wraps
6
+ from typing import Optional, Dict
7
 
8
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
9
  from dotenv import load_dotenv
10
  from requests.exceptions import RequestException
 
11
  import wikipedia
 
12
  from llama_index.core.schema import Document
13
+ from llama_index.readers.web import BeautifulSoupWebReader
14
 
15
  from smolagents import (
16
  CodeAgent,
 
73
  answer = answer.strip().rstrip('.')
74
  is_list = ',' in answer and len(answer.split(',')) > 1
75
  try:
76
+ # Check if it can be a number, ignoring commas for list check
77
  is_numeric = not is_list and float(answer.replace(',', '')) is not None
78
  except ValueError:
79
  is_numeric = False
 
115
 
116
  @tool
117
  @retry
118
+ @lru_cache(maxsize=128)
119
+ def get_webpage_content(url: str) -> str:
120
  """
121
+ Extracts the text content from a single webpage.
122
 
123
  Args:
124
+ url (str): The full URL of the webpage to read.
 
125
  """
126
+ logging.info(f"πŸ“„ Reading webpage content from: {url}")
127
+ loader = BeautifulSoupWebReader()
128
  docs = loader.load_data(urls=[url])
129
+ if not docs or not docs[0].text:
130
+ raise ValueError(f"No content could be extracted from {url}")
131
+ # Return up to the first 15,000 characters to avoid overwhelming the context window.
132
+ return docs[0].text[:15000]
 
133
 
134
  @tool
135
  @retry
136
+ @lru_cache(maxsize=128)
137
+ def get_youtube_transcript(video_url: str) -> str:
138
  """
139
+ Fetches the full transcript of a YouTube video as a single string.
140
 
141
  Args:
142
  video_url (str): The full URL of the YouTube video.
 
143
  """
144
+ logging.info(f"🎬 Fetching YouTube transcript for: {video_url}")
145
  video_id_match = re.search(r'(?:v=|\/)([a-zA-Z0-9_-]{11}).*', video_url)
146
+ if not video_id_match:
147
+ return "Error: Invalid YouTube URL provided."
148
  video_id = video_id_match.group(1)
149
 
150
+ try:
151
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
152
+ transcript_text = ' '.join([t['text'] for t in transcript_list])
153
+ # Return up to the first 15,000 characters
154
+ return transcript_text[:15000]
155
+ except (TranscriptsDisabled, NoTranscriptFound) as e:
156
+ logging.error(f"Could not retrieve transcript for {video_url}: {e}")
157
+ raise YouTubeTranscriptApiError(f"Transcript not available for video {video_id}.") from e
158
 
159
  @tool
160
  @retry
161
+ @lru_cache(maxsize=32)
162
  def wikipedia_search(query: str) -> str:
163
  """
164
  Searches Wikipedia for a given query and returns a summary.
 
171
  except wikipedia.exceptions.PageError:
172
  return f"No Wikipedia page found for '{query}'."
173
  except wikipedia.exceptions.DisambiguationError as e:
174
+ return f"Ambiguous query '{query}'. Options: {', '.join(e.options[:3])}"
175
  except Exception as e:
176
  return f"An error occurred during Wikipedia search: {e}"
177
 
 
187
 
188
  google_search_tool = GoogleSearchTool(provider='serpapi', serpapi_api_key=api_keys['serpapi']) if api_keys['serpapi'] else None
189
 
190
+ tools_list = [
191
+ tool for tool in [
192
+ google_search_tool,
193
+ get_webpage_content,
194
+ get_youtube_transcript,
195
+ wikipedia_search
196
+ ] if tool
197
+ ]
198
 
199
  agent = CodeAgent(
200
  model=model,
 
202
  instructions="""You are a master AI assistant for the GAIA benchmark. Your goal is to provide a single, precise, and final answer by writing and executing Python code.
203
 
204
  **STRATEGY:**
205
+ You have a powerful toolkit. You can write and execute any Python code you need. You also have access to pre-defined tools that you can call from within your code to gather information.
206
 
207
  1. **Analyze**: Break down the user's question into logical steps.
208
+ 2. **Plan**: Decide if you need to search the web, read a webpage, get a video transcript, or perform a calculation.
209
  3. **Execute**: Write a Python script to perform the steps.
210
+ * For general web searches, use `GoogleSearchTool()`.
211
  * For Wikipedia lookups, use `wikipedia_search()`.
212
+ * To read the text content of a specific webpage, use `get_webpage_content()`.
213
+ * To get the transcript of a YouTube video, use `get_youtube_transcript()`.
214
+ * For complex calculations or data manipulation, write the Python code directly using libraries like `math`.
215
 
216
  **HOW TO USE TOOLS IN YOUR CODE:**
217
+ To solve a problem, you will write a Python code block that calls the necessary tools. You then reason over the results of these tools to produce your final answer.
218
 
219
  *Example 1: Simple Calculation*
220
  ```python
 
224
  print(int(result))
225
  ```
226
 
227
+ *Example 2: Multi-step question involving web search and reading a page*
228
  ```python
229
+ # Find the name of the journal that published the article "A Rapid and Sensitive Method for the Quantitation of Microgram Quantities of Protein Utilizing the Principle of Protein-Dye Binding"
230
+ # First, find the URL of the paper.
231
+ search_results = GoogleSearchTool(query="A Rapid and Sensitive Method for the Quantitation of Microgram Quantities of Protein Utilizing the Principle of Protein-Dye Binding")
232
+ # Let's assume the first result has a good URL, like "https://www.sciencedirect.com/science/article/pii/0003269776905271"
233
+ # Now, read the content of that page to find the journal name.
234
+ page_content = get_webpage_content(url="https://www.sciencedirect.com/science/article/pii/0003269776905271")
235
+ # Now I will analyze the text `page_content` in my head to find the journal name.
236
+ # After reading the text, I found the journal is "Analytical Biochemistry".
237
+ print("Analytical Biochemistry")
238
  ```
239
 
240
  **CRITICAL INSTRUCTION:** You MUST end your entire response with the line `FINAL ANSWER: [Your Final Answer]`. This is the only part of your response that will be graded. Adhere to strict formatting: no extra words, no currency symbols, no commas in numbers.
requirements.txt CHANGED
@@ -4,16 +4,11 @@ python-dotenv
4
  huggingface_hub
5
  gradio
6
  markdownify
7
- duckduckgo-search
8
  wikipedia
9
  serpapi
10
- llama-index
11
  youtube-transcript-api
12
  together
13
- python-chess
14
- transformers
15
- torch
16
  requests
17
- llama-index
18
  beautifulsoup4
19
- lxml
 
4
  huggingface_hub
5
  gradio
6
  markdownify
 
7
  wikipedia
8
  serpapi
 
9
  youtube-transcript-api
10
  together
 
 
 
11
  requests
12
+ llama-index-readers-web
13
  beautifulsoup4
14
+ lxml