Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
|
|
|
|
|
| 1 |
import uvicorn
|
| 2 |
from fastapi import FastAPI
|
| 3 |
from pydantic import BaseModel
|
| 4 |
-
import
|
| 5 |
-
import os
|
| 6 |
|
| 7 |
# Inicializar la aplicaci贸n FastAPI
|
| 8 |
app = FastAPI()
|
|
@@ -14,27 +15,63 @@ class PromptRequest(BaseModel):
|
|
| 14 |
# Ruta principal para ejecutar el modelo
|
| 15 |
@app.post("/generate/")
|
| 16 |
async def generate_text(request: PromptRequest):
|
|
|
|
| 17 |
model_path = "/content/executorch/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8.pte"
|
| 18 |
tokenizer_path = "/content/executorch/tokenizer.model"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
prompt = request.prompt
|
| 20 |
-
|
| 21 |
-
# Ejecutar el modelo llamando a la funci贸n que ejecuta el comando
|
| 22 |
result = run_llama_model(model_path, tokenizer_path, prompt)
|
| 23 |
|
| 24 |
return {"generated_text": result}
|
| 25 |
|
| 26 |
def run_command(command):
|
| 27 |
-
"""Ejecutar un comando en el shell."""
|
| 28 |
result = subprocess.run(command, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 29 |
if result.returncode != 0:
|
| 30 |
return f"Error ejecutando el comando: {result.stderr}"
|
| 31 |
return result.stdout
|
| 32 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
def run_llama_model(model_path, tokenizer_path, prompt):
|
| 34 |
-
"""Ejecutar el modelo Llama."""
|
| 35 |
cmd = f"cd /content/executorch/cmake-out/examples/models/llama && ./llama_main --model_path={model_path} --tokenizer_path={tokenizer_path} --prompt='{prompt}'"
|
| 36 |
return run_command(cmd)
|
| 37 |
|
| 38 |
# Iniciar el servidor Uvicorn directamente desde el c贸digo Python
|
| 39 |
if __name__ == "__main__":
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import subprocess
|
| 3 |
import uvicorn
|
| 4 |
from fastapi import FastAPI
|
| 5 |
from pydantic import BaseModel
|
| 6 |
+
import sysconfig
|
|
|
|
| 7 |
|
| 8 |
# Inicializar la aplicaci贸n FastAPI
|
| 9 |
app = FastAPI()
|
|
|
|
| 15 |
# Ruta principal para ejecutar el modelo
|
| 16 |
@app.post("/generate/")
|
| 17 |
async def generate_text(request: PromptRequest):
|
| 18 |
+
# Especificar las rutas del modelo y tokenizador
|
| 19 |
model_path = "/content/executorch/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8.pte"
|
| 20 |
tokenizer_path = "/content/executorch/tokenizer.model"
|
| 21 |
+
|
| 22 |
+
# Descargar los archivos si no existen
|
| 23 |
+
download_files(model_path, tokenizer_path)
|
| 24 |
+
|
| 25 |
prompt = request.prompt
|
|
|
|
|
|
|
| 26 |
result = run_llama_model(model_path, tokenizer_path, prompt)
|
| 27 |
|
| 28 |
return {"generated_text": result}
|
| 29 |
|
| 30 |
def run_command(command):
|
| 31 |
+
"""Ejecutar un comando en el shell y devolver la salida."""
|
| 32 |
result = subprocess.run(command, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
| 33 |
if result.returncode != 0:
|
| 34 |
return f"Error ejecutando el comando: {result.stderr}"
|
| 35 |
return result.stdout
|
| 36 |
|
| 37 |
+
def download_files(model_path, tokenizer_path):
|
| 38 |
+
"""Descargar el modelo y tokenizador si no est谩n presentes."""
|
| 39 |
+
if not os.path.exists(model_path):
|
| 40 |
+
print(f"Descargando el modelo desde Hugging Face: {model_path}")
|
| 41 |
+
run_command(f"wget https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET/resolve/main/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8.pte -O {model_path}")
|
| 42 |
+
else:
|
| 43 |
+
print(f"El modelo ya est谩 presente en: {model_path}")
|
| 44 |
+
|
| 45 |
+
if not os.path.exists(tokenizer_path):
|
| 46 |
+
print(f"Descargando el tokenizador desde Hugging Face: {tokenizer_path}")
|
| 47 |
+
run_command(f"wget https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET/resolve/main/tokenizer.model -O {tokenizer_path}")
|
| 48 |
+
else:
|
| 49 |
+
print(f"El tokenizador ya est谩 presente en: {tokenizer_path}")
|
| 50 |
+
|
| 51 |
def run_llama_model(model_path, tokenizer_path, prompt):
|
| 52 |
+
"""Ejecutar el modelo Llama y generar texto."""
|
| 53 |
cmd = f"cd /content/executorch/cmake-out/examples/models/llama && ./llama_main --model_path={model_path} --tokenizer_path={tokenizer_path} --prompt='{prompt}'"
|
| 54 |
return run_command(cmd)
|
| 55 |
|
| 56 |
# Iniciar el servidor Uvicorn directamente desde el c贸digo Python
|
| 57 |
if __name__ == "__main__":
|
| 58 |
+
# Obtener la ruta del entorno Python
|
| 59 |
+
lib_path = sysconfig.get_paths()["purelib"]
|
| 60 |
+
print(f"Usando la ruta de la biblioteca Python: {lib_path}")
|
| 61 |
+
|
| 62 |
+
# Configurar el entorno y dependencias si es necesario
|
| 63 |
+
install_requirements(lib_path)
|
| 64 |
+
|
| 65 |
+
# Ejecutar el servidor Uvicorn
|
| 66 |
+
uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
|
| 67 |
+
|
| 68 |
+
def install_requirements(lib_path):
|
| 69 |
+
"""Instalar dependencias de ejecutorch."""
|
| 70 |
+
print("Instalando dependencias de Executorch...")
|
| 71 |
+
run_command(f"cd /content/executorch && CMAKE_PREFIX_PATH={lib_path} EXECUTORCH_BUILD_XNNPACK=ON bash ./install_requirements.sh --pybind")
|
| 72 |
+
print("Dependencias de Executorch instaladas.")
|
| 73 |
+
|
| 74 |
+
# Instalar requerimientos adicionales para el modelo Llama
|
| 75 |
+
print("Instalando dependencias para el modelo Llama...")
|
| 76 |
+
run_command("cd /content/executorch/examples/models/llama && ./install_requirements.sh")
|
| 77 |
+
print("Requerimientos de Llama instalados.")
|