hetline commited on
Commit
9a96a6f
1 Parent(s): 020a586

feat: add agent body

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ /venv
2
+ .env
agent/__pycache__/custom_state.cpython-312.pyc ADDED
Binary file (696 Bytes). View file
 
agent/__pycache__/graph.cpython-312.pyc ADDED
Binary file (1.16 kB). View file
 
agent/__pycache__/models.cpython-312.pyc ADDED
Binary file (1 kB). View file
 
agent/__pycache__/nodes.cpython-312.pyc ADDED
Binary file (2.48 kB). View file
 
agent/custom_state.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, List, Dict, Optional, Any, Annotated
2
+ from langgraph.graph.message import add_messages
3
+ from langchain_core.messages import AnyMessage
4
+
5
+ class AssistantState(TypedDict):
6
+ messages: Annotated[list[AnyMessage], add_messages]
7
+ filename : str
8
+ file_extension : str
agent/data.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+
3
+ class AudioFileLoader():
4
+
5
+ def __init__(self, filename : str):
6
+ self.filename = filename
7
+
8
+ def load(self):
9
+ with open(self.filename, "rb") as file:
10
+ encoded_string = base64.b64encode(file.read())
11
+ return encoded_string
agent/files/cca530fc-4052-43b2-b130-b30968d8aa44.png ADDED
agent/graph.png ADDED
agent/graph.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from custom_state import AssistantState
2
+ from langgraph.graph import START, StateGraph, END
3
+ from nodes import assisstant, tools, get_file
4
+ from langgraph.prebuilt import tools_condition, ToolNode
5
+
6
+ builder = StateGraph(AssistantState)
7
+
8
+ builder.add_node("get_file", get_file)
9
+ builder.add_node("assisstant", assisstant)
10
+ builder.add_node("tools", ToolNode(tools))
11
+
12
+ builder.add_edge(START, "get_file")
13
+ builder.add_edge("get_file", "assisstant")
14
+ builder.add_conditional_edges(
15
+ "assisstant",
16
+ tools_condition
17
+ )
18
+ builder.add_edge("tools", "assisstant")
19
+
20
+ graph = builder.compile()
21
+
22
+ png_bytes = graph.get_graph().draw_mermaid_png()
23
+
24
+ with open("graph.png", "wb") as f:
25
+ f.write(png_bytes)
26
+
agent/main.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from graph import graph
2
+ from langchain_core.messages import SystemMessage, HumanMessage
3
+ from langfuse.langchain import CallbackHandler
4
+
5
+ langfuse_handler = CallbackHandler()
6
+
7
+ messages = [
8
+ SystemMessage("You are a general AI assistant. I will ask you a question. " \
9
+ "First, think step-by-step about the best approach to answer the question, reporting your thoughts. " \
10
+ "After you have reported your thoughts, if you need to use a tool, call it. " \
11
+ "Finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]." \
12
+ " YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. " \
13
+ "If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise." \
14
+ " If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), "
15
+ "and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, " \
16
+ "apply the above rules depending of whether the element to be put in the list is a number or a string."),
17
+ HumanMessage("How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)?" \
18
+ " You can use the latest 2022 version of english wikipedia.")
19
+ ]
20
+
21
+ graph.invoke(
22
+ input={'messages' : messages},
23
+ config={'callbacks' : [langfuse_handler], 'recursion_limit' : 10}
24
+ )
agent/models.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # We need to create nodes per the LLMs
2
+ # And for the tools to be used
3
+ from dotenv import load_dotenv
4
+ from openai import OpenAI
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain_google_genai import ChatGoogleGenerativeAI
7
+ import os
8
+
9
+ load_dotenv()
10
+
11
+ openai_api_key = os.getenv("OPENAI_API_KEY")
12
+ gemini_api_key = os.getenv("GOOGLE_API_KEY")
13
+ llama4_api_key = os.getenv("HUGGINGFACE_API_TOKEN")
14
+
15
+ llama_client = OpenAI(
16
+ base_url="https://router.huggingface.co/v1",
17
+ api_key=llama4_api_key,
18
+ )
19
+
20
+ openai_model = ChatOpenAI(
21
+ model="gpt-4o",
22
+ # reasoning_effort="minimal",
23
+ temperature=0,
24
+ max_tokens=None,
25
+ api_key=openai_api_key
26
+ )
27
+
28
+ gemini_model = ChatGoogleGenerativeAI(
29
+ model="gemini-2.5-pro",
30
+ temperature=0,
31
+ max_tokens=None,
32
+ google_api_key=gemini_api_key
33
+ )
34
+
35
+
36
+
agent/nodes.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.tools import tool
2
+ from langchain_community.tools import YouTubeSearchTool
3
+ from langchain_community.data_loaders.csv_loader import CSVLoader
4
+ from langchain_community.data_loaders import PyPDFLoader, UnstructuredExcelLoader
5
+ from langchain_community.data_loaders.python_loader import PythonLoader
6
+ from data import AudioFileLoader
7
+ from langchain_yt_dlp.youtube_loader import YoutubeLoaderDL
8
+ from youtube_transcript_api import YouTubeTranscriptApi
9
+ from youtube_transcript_api._errors import TranscriptsDisabled
10
+ from langchain_core.messages import SystemMessage, HumanMessage
11
+ from tavily import TavilyClient
12
+ from custom_state import AssistantState
13
+ import os
14
+ from models import openai_model, gemini_model, llama_client
15
+ from dotenv import load_dotenv
16
+ from e2b_code_interpreter import Sandbox
17
+
18
+ import requests
19
+
20
+ load_dotenv()
21
+
22
+ tavily_client = TavilyClient(
23
+ api_key=os.getenv("TAVILY_API_KEY")
24
+ )
25
+
26
+ youtube_search_tool = YouTubeSearchTool()
27
+
28
+ @tool
29
+ def youtube_search(link : str) -> str:
30
+ """Search for youtube videos. Input must be only the youtube video URL
31
+ """
32
+ transcript = ""
33
+ loader = YoutubeLoaderDL.from_youtube_url(
34
+ link, add_video_info=True
35
+ )
36
+ documents = loader.load()
37
+ video_metadata = documents[0].metadata
38
+ video_id = video_metadata["source"]
39
+ ytt_api = YouTubeTranscriptApi()
40
+ try:
41
+ fetched_transcript = ytt_api.fetch(video_id)
42
+ for snippet in fetched_transcript:
43
+ transcript = transcript + " " + snippet.text
44
+ except TranscriptsDisabled:
45
+ transcript = "No description of video content available"
46
+ return transcript
47
+
48
+ @tool
49
+ def web_search(query : str) -> str:
50
+ """Provides web search to retrieve information outside of LLMs knowledge
51
+ """
52
+ response = tavily_client.search(query, include_answer=True)
53
+ #response = "Use the next information to answer: " + response["results"][0]["content"]
54
+ return response["answer"]
55
+
56
+ @tool
57
+ def code_interpreter(query : str) -> str:
58
+ """Let's the LLM to write and execute code if needed for a task
59
+ """
60
+ messages = [
61
+ SystemMessage("You are a really good programer that writes code to solve questions. Respond with only the code, nothing more."),
62
+ HumanMessage(query)
63
+ ]
64
+ response = openai_model.invoke(messages)
65
+ code = response.content
66
+ try:
67
+ if code:
68
+ with Sandbox() as sandbox:
69
+ execution = sandbox.run_code(code)
70
+ result = execution.logs.stdout[0]
71
+ return result
72
+ except:
73
+ return "There was an error while trying to execute code"
74
+
75
+ #@tool
76
+ #def get_file():
77
+ # Necesito formas de procesar mp3 (multimodal)
78
+ # pdf (can be multimodal)
79
+ # png (multimodal)
80
+ # py
81
+ # xlsx
82
+ # pass
83
+
84
+
85
+ #result = web_search("https://www.youtube.com/watch?v=1htKBjuUWec.")
86
+ #result = code_interpreter("How many 'r' are in the word strawberry?")
87
+ #print(result)
88
+ #print(type(result))
89
+
90
+ tools = [web_search, code_interpreter, youtube_search]
91
+ llm_with_tools = openai_model.bind_tools(tools)
92
+
93
+ def load_data(file_extension : str) -> dict:
94
+ if file_extension == "csv":
95
+ loader = CSVLoader()
96
+ elif file_extension == "py":
97
+ loader = PythonLoader()
98
+ elif file_extension == "pdf":
99
+ loader = PyPDFLoader()
100
+ elif file_extension == "xlsx":
101
+ loader = UnstructuredExcelLoader()
102
+ elif file_extension == "mp3":
103
+ loader = AudioFileLoader()
104
+ # Aqui armamos el content block
105
+ return loader.load()
106
+
107
+ def get_file(state : AssistantState) -> str:
108
+ """Gets a file from an API given the name
109
+ """
110
+ headers = {
111
+ 'accept' : 'application/json'
112
+ }
113
+ if state["filename"]:
114
+ file_id = state["filename"].split(".")[0]
115
+ file_extension = state["filename"].split(".")[1]
116
+ response = requests.get(f"https://agents-course-unit4-scoring.hf.space/files/{file_id}", headers=headers)
117
+ with open(f"files/{file_id}.{file_extension}", "wb") as file:
118
+ file.write(response.content)
119
+ return {
120
+ "file_extension" : file_extension
121
+ }
122
+
123
+ def assisstant(state : AssistantState) -> AssistantState:
124
+ # Revisar si tenemos un file en el state, de ser as铆 entonces lo cargamos con
125
+ # un document loader, lo encodeamos base64 y armamos la llamada multimodal
126
+ # en caso contrario se hace el invoke simple
127
+ messages = state["messages"]
128
+ if state["filename"]:
129
+ content_block = load_data(state["file_extension"])
130
+ message = HumanMessage(
131
+ content=[
132
+ content_block
133
+ ]
134
+ )
135
+ messages.append(message)
136
+ response = llm_with_tools.invoke(messages)
137
+ return {
138
+ 'messages' : [response]
139
+ }
140
+
141
+
142
+ #loader = YoutubeLoader.from_youtube_url(
143
+ # "https://www.youtube.com/watch?v=QsYGlZkevEg", add_video_info=False
144
+ #)
145
+ #a = loader.load()
146
+ a = get_file("cca530fc-4052-43b2-b130-b30968d8aa44", "png")
147
+ print(a.content)
148
+ # Basic transcript loading
agent/prompts.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Hagamos el prompt de evaluaci贸n para que sea m谩s verboso en su reasoning path
2
+ # Y el otro de producci贸n para que no sea verboso
requirements.txt CHANGED
@@ -1 +1,11 @@
1
- huggingface_hub==0.25.2
 
 
 
 
 
 
 
 
 
 
 
1
+ huggingface_hub==0.25.2
2
+ langchain_openai==0.3.28
3
+ langchain-google-genai==2.1.9
4
+ youtube-search==2.1.2
5
+ youtube-transcript-api==1.2.2
6
+ langchain_community==0.3.27
7
+ langgraph==0.6.2
8
+ langfuse==3.2.1
9
+ python-dotenv==1.1.1
10
+ tavily-python==0.7.10
11
+ e2b-code-interpreter==1.5.2