Spaces:

hetline
/

gaia-agent

Sleeping

App Files Files Community

hetline commited on Aug 18

Commit

9a96a6f

1 Parent(s): 020a586

feat: add agent body

Browse files

Files changed (15) hide show

.gitignore +2 -0
agent/__pycache__/custom_state.cpython-312.pyc +0 -0
agent/__pycache__/graph.cpython-312.pyc +0 -0
agent/__pycache__/models.cpython-312.pyc +0 -0
agent/__pycache__/nodes.cpython-312.pyc +0 -0
agent/custom_state.py +8 -0
agent/data.py +11 -0
agent/files/cca530fc-4052-43b2-b130-b30968d8aa44.png +0 -0
agent/graph.png +0 -0
agent/graph.py +26 -0
agent/main.py +24 -0
agent/models.py +36 -0
agent/nodes.py +148 -0
agent/prompts.py +2 -0
requirements.txt +11 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ /venv
2	+ .env

agent/__pycache__/custom_state.cpython-312.pyc ADDED Viewed

Binary file (696 Bytes). View file

agent/__pycache__/graph.cpython-312.pyc ADDED Viewed

Binary file (1.16 kB). View file

agent/__pycache__/models.cpython-312.pyc ADDED Viewed

Binary file (1 kB). View file

agent/__pycache__/nodes.cpython-312.pyc ADDED Viewed

Binary file (2.48 kB). View file

agent/custom_state.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from typing import TypedDict, List, Dict, Optional, Any, Annotated
+from langgraph.graph.message import add_messages
+from langchain_core.messages import AnyMessage
+class AssistantState(TypedDict):
+    messages: Annotated[list[AnyMessage], add_messages]
+    filename : str
+    file_extension : str

agent/data.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import base64
+class AudioFileLoader():
+    def __init__(self, filename : str):
+        self.filename = filename
+    def load(self):
+        with open(self.filename, "rb") as file:
+            encoded_string = base64.b64encode(file.read())
+        return encoded_string

agent/files/cca530fc-4052-43b2-b130-b30968d8aa44.png ADDED Viewed

agent/graph.png ADDED Viewed

agent/graph.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from custom_state import AssistantState
+from langgraph.graph import START, StateGraph, END
+from nodes import assisstant, tools, get_file
+from langgraph.prebuilt import tools_condition, ToolNode
+builder = StateGraph(AssistantState)
+builder.add_node("get_file", get_file)
+builder.add_node("assisstant", assisstant)
+builder.add_node("tools", ToolNode(tools))
+builder.add_edge(START, "get_file")
+builder.add_edge("get_file", "assisstant")
+builder.add_conditional_edges(
+    "assisstant",
+    tools_condition
+)
+builder.add_edge("tools", "assisstant")
+graph = builder.compile()
+png_bytes = graph.get_graph().draw_mermaid_png()
+with open("graph.png", "wb") as f:
+    f.write(png_bytes)

agent/main.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from graph import graph
+from langchain_core.messages import SystemMessage, HumanMessage
+from langfuse.langchain import CallbackHandler
+langfuse_handler = CallbackHandler()
+messages = [
+    SystemMessage("You are a general AI assistant. I will ask you a question. " \
+    "First, think step-by-step about the best approach to answer the question, reporting your thoughts. " \
+    "After you have reported your thoughts, if you need to use a tool, call it. " \
+    "Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]." \
+    " YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. " \
+    "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise." \
+    " If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), "
+    "and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, " \
+    "apply the above rules depending of whether the element to be put in the list is a number or a string."),
+    HumanMessage("How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?" \
+    " You can use the latest 2022 version of english wikipedia.")
+]
+graph.invoke(
+    input={'messages' : messages},
+    config={'callbacks' : [langfuse_handler], 'recursion_limit' : 10}
+)

agent/models.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# We need to create nodes per the LLMs
+# And for the tools to be used
+from dotenv import load_dotenv
+from openai import OpenAI
+from langchain_openai import ChatOpenAI
+from langchain_google_genai import ChatGoogleGenerativeAI
+import os
+load_dotenv()
+openai_api_key = os.getenv("OPENAI_API_KEY")
+gemini_api_key = os.getenv("GOOGLE_API_KEY")
+llama4_api_key = os.getenv("HUGGINGFACE_API_TOKEN")
+llama_client = OpenAI(
+    base_url="https://router.huggingface.co/v1",
+    api_key=llama4_api_key,
+)
+openai_model = ChatOpenAI(
+    model="gpt-4o",
+#    reasoning_effort="minimal",
+    temperature=0,
+    max_tokens=None,
+    api_key=openai_api_key
+)
+gemini_model = ChatGoogleGenerativeAI(
+    model="gemini-2.5-pro",
+    temperature=0,
+    max_tokens=None,
+    google_api_key=gemini_api_key
+)

agent/nodes.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from langchain.tools import tool
+from langchain_community.tools import YouTubeSearchTool
+from langchain_community.data_loaders.csv_loader import CSVLoader
+from langchain_community.data_loaders import PyPDFLoader, UnstructuredExcelLoader
+from langchain_community.data_loaders.python_loader import PythonLoader
+from data import AudioFileLoader
+from langchain_yt_dlp.youtube_loader import YoutubeLoaderDL
+from youtube_transcript_api import YouTubeTranscriptApi
+from youtube_transcript_api._errors import TranscriptsDisabled
+from langchain_core.messages import SystemMessage, HumanMessage
+from tavily import TavilyClient
+from custom_state import AssistantState
+import os
+from models import openai_model, gemini_model, llama_client
+from dotenv import load_dotenv
+from e2b_code_interpreter import Sandbox
+import requests
+load_dotenv()
+tavily_client = TavilyClient(
+    api_key=os.getenv("TAVILY_API_KEY")
+)
+youtube_search_tool = YouTubeSearchTool()
+@tool
+def youtube_search(link : str) -> str:
+    """Search for youtube videos. Input must be only the youtube video URL
+    """
+    transcript = ""
+    loader = YoutubeLoaderDL.from_youtube_url(
+        link, add_video_info=True
+    )
+    documents = loader.load()
+    video_metadata = documents[0].metadata
+    video_id = video_metadata["source"]
+    ytt_api = YouTubeTranscriptApi()
+    try:
+        fetched_transcript = ytt_api.fetch(video_id)
+        for snippet in fetched_transcript:
+            transcript = transcript + " " + snippet.text
+    except TranscriptsDisabled:
+        transcript = "No description of video content available"
+    return transcript
+@tool
+def web_search(query : str) -> str:
+    """Provides web search to retrieve information outside of LLMs knowledge
+    """
+    response = tavily_client.search(query, include_answer=True)
+    #response = "Use the next information to answer: " + response["results"][0]["content"]
+    return response["answer"]
+@tool
+def code_interpreter(query : str) -> str:
+    """Let's the LLM to write and execute code if needed for a task
+    """
+    messages = [
+        SystemMessage("You are a really good programer that writes code to solve questions. Respond with only the code, nothing more."),
+        HumanMessage(query)
+    ]
+    response = openai_model.invoke(messages)
+    code = response.content
+    try:
+        if code:
+            with Sandbox() as sandbox:
+                execution = sandbox.run_code(code)
+                result = execution.logs.stdout[0]
+        return result
+    except:
+        return "There was an error while trying to execute code"
+#@tool
+#def get_file():
+    # Necesito formas de procesar mp3 (multimodal)
+    # pdf (can be multimodal)
+    # png (multimodal)
+    # py
+    # xlsx
+#    pass
+#result = web_search("https://www.youtube.com/watch?v=1htKBjuUWec.")
+#result = code_interpreter("How many 'r' are in the word strawberry?")
+#print(result)
+#print(type(result))
+tools = [web_search, code_interpreter, youtube_search]
+llm_with_tools = openai_model.bind_tools(tools)
+def load_data(file_extension : str) -> dict:
+    if file_extension == "csv":
+        loader = CSVLoader()
+    elif file_extension == "py":
+        loader = PythonLoader()
+    elif file_extension == "pdf":
+        loader = PyPDFLoader()
+    elif file_extension == "xlsx":
+        loader = UnstructuredExcelLoader()
+    elif file_extension == "mp3":
+        loader = AudioFileLoader()
+    # Aqui armamos el content block
+    return loader.load()
+def get_file(state : AssistantState) -> str:
+    """Gets a file from an API given the name
+    """
+    headers = {
+        'accept' : 'application/json'
+    }
+    if state["filename"]:
+        file_id = state["filename"].split(".")[0]
+        file_extension = state["filename"].split(".")[1]
+        response = requests.get(f"https://agents-course-unit4-scoring.hf.space/files/{file_id}", headers=headers)
+        with open(f"files/{file_id}.{file_extension}", "wb") as file:
+            file.write(response.content)
+        return {
+            "file_extension" : file_extension
+        }
+def assisstant(state : AssistantState) -> AssistantState:
+    # Revisar si tenemos un file en el state, de ser así entonces lo cargamos con
+    # un document loader, lo encodeamos base64 y armamos la llamada multimodal
+    # en caso contrario se hace el invoke simple
+    messages = state["messages"]
+    if state["filename"]:
+        content_block = load_data(state["file_extension"])
+        message = HumanMessage(
+            content=[
+                content_block
+            ]
+        )
+        messages.append(message)
+    response = llm_with_tools.invoke(messages)
+    return {
+        'messages' : [response]
+    }
+#loader = YoutubeLoader.from_youtube_url(
+#    "https://www.youtube.com/watch?v=QsYGlZkevEg", add_video_info=False
+#)
+#a = loader.load()
+a = get_file("cca530fc-4052-43b2-b130-b30968d8aa44", "png")
+print(a.content)
+# Basic transcript loading

agent/prompts.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Hagamos el prompt de evaluación para que sea más verboso en su reasoning path
2	+ # Y el otro de producción para que no sea verboso

requirements.txt CHANGED Viewed

	@@ -1 +1,11 @@
1	- huggingface_hub==0.25.2

+huggingface_hub==0.25.2
+langchain_openai==0.3.28
+langchain-google-genai==2.1.9
+youtube-search==2.1.2
+youtube-transcript-api==1.2.2
+langchain_community==0.3.27
+langgraph==0.6.2
+langfuse==3.2.1
+python-dotenv==1.1.1
+tavily-python==0.7.10
+e2b-code-interpreter==1.5.2