Spaces:
Sleeping
Sleeping
| from langchain_community.document_loaders.csv_loader import CSVLoader | |
| from langchain_community.document_loaders import PyPDFLoader, UnstructuredExcelLoader | |
| from langchain_community.document_loaders.python import PythonLoader | |
| from agent.data import FileLoader | |
| import base64 | |
| import re, textwrap | |
| FENCE_RE = re.compile( | |
| r"(?s)```(?:python|py)?\s*(.*?)```" # bloque ```python ... ``` | |
| r"|~~~(?:python|py)?\s*(.*?)~~~" # bloque ~~~python ... ~~~ | |
| ) | |
| def extract_python(src: str) -> str: | |
| m = FENCE_RE.search(src) | |
| if m: | |
| code = next(g for g in m.groups() if g is not None) | |
| else: | |
| code = src | |
| # quita indentación accidental del LLM | |
| code = textwrap.dedent(code).strip() | |
| # elimina triples backticks sueltos si quedaron | |
| return code.replace("```", "").replace("~~~", "").strip() | |
| def load_data(file_path : str, file_extension : str) -> dict: | |
| if file_extension == "csv": | |
| loader = CSVLoader(file_path) # Este cambia, y trata a cada row como un documento separado | |
| type_d = "text" | |
| mime_type = "text/csv" | |
| data = loader.load() | |
| data = base64.b64encode(data).decode('utf-8') | |
| elif file_extension == "py": | |
| loader = PythonLoader(file_path) | |
| type_d = "text" | |
| mime_type = "text/x-python" | |
| data = loader.load() | |
| #data = base64.b64encode(data[0].page_content) | |
| return data[0].page_content | |
| elif file_extension == "pdf": | |
| loader = PyPDFLoader(file_path) | |
| type_d = "file" | |
| mime_type = "application/pdf" | |
| data = loader.load() | |
| data = base64.b64encode(data).decode('utf-8') | |
| elif file_extension == "xlsx": | |
| loader = UnstructuredExcelLoader(file_path) | |
| type_d = "file" | |
| mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
| data = loader.load() | |
| #data = base64.b64encode(data).decode('utf-8') | |
| return data[0].page_content | |
| elif file_extension == "mp3": | |
| loader = FileLoader(file_path) | |
| type_d = "audio" | |
| mime_type = "audio/mp3" | |
| data = loader.load() | |
| data = base64.b64encode(data).decode('utf-8') | |
| elif file_extension == "png": | |
| loader = FileLoader(file_path) | |
| type_d = "image" | |
| mime_type = "image/png" | |
| data = loader.load() | |
| data = base64.b64encode(data).decode('utf-8') | |
| content_block = { | |
| "type": type_d, | |
| "source_type": "base64", | |
| "data": data, | |
| "mime_type" : mime_type, | |
| } | |
| print(file_path) | |
| return content_block | |