gaia-agent / agent /utils.py
hetline's picture
feat: change imports
0fa23cd
raw
history blame
2.56 kB
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders import PyPDFLoader, UnstructuredExcelLoader
from langchain_community.document_loaders.python import PythonLoader
from agent.data import FileLoader
import base64
import re, textwrap
FENCE_RE = re.compile(
r"(?s)```(?:python|py)?\s*(.*?)```" # bloque ```python ... ```
r"|~~~(?:python|py)?\s*(.*?)~~~" # bloque ~~~python ... ~~~
)
def extract_python(src: str) -> str:
m = FENCE_RE.search(src)
if m:
code = next(g for g in m.groups() if g is not None)
else:
code = src
# quita indentación accidental del LLM
code = textwrap.dedent(code).strip()
# elimina triples backticks sueltos si quedaron
return code.replace("```", "").replace("~~~", "").strip()
def load_data(file_path : str, file_extension : str) -> dict:
if file_extension == "csv":
loader = CSVLoader(file_path) # Este cambia, y trata a cada row como un documento separado
type_d = "text"
mime_type = "text/csv"
data = loader.load()
data = base64.b64encode(data).decode('utf-8')
elif file_extension == "py":
loader = PythonLoader(file_path)
type_d = "text"
mime_type = "text/x-python"
data = loader.load()
#data = base64.b64encode(data[0].page_content)
return data[0].page_content
elif file_extension == "pdf":
loader = PyPDFLoader(file_path)
type_d = "file"
mime_type = "application/pdf"
data = loader.load()
data = base64.b64encode(data).decode('utf-8')
elif file_extension == "xlsx":
loader = UnstructuredExcelLoader(file_path)
type_d = "file"
mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
data = loader.load()
#data = base64.b64encode(data).decode('utf-8')
return data[0].page_content
elif file_extension == "mp3":
loader = FileLoader(file_path)
type_d = "audio"
mime_type = "audio/mp3"
data = loader.load()
data = base64.b64encode(data).decode('utf-8')
elif file_extension == "png":
loader = FileLoader(file_path)
type_d = "image"
mime_type = "image/png"
data = loader.load()
data = base64.b64encode(data).decode('utf-8')
content_block = {
"type": type_d,
"source_type": "base64",
"data": data,
"mime_type" : mime_type,
}
print(file_path)
return content_block