Siavassh commited on
Commit
706ac60
·
verified ·
1 Parent(s): d92e6d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -5
app.py CHANGED
@@ -1,16 +1,32 @@
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import torch
3
  import os
4
 
5
- model_name = "meta-llama/Llama-4-11B-Instruct"
6
 
7
- # use the name of your secret, here "test"
8
- token = os.getenv("test")
 
9
 
10
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=token)
 
11
  model = AutoModelForCausalLM.from_pretrained(
12
  model_name,
13
  torch_dtype=torch.float16,
14
  device_map="auto",
15
- token=token
16
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
5
  import os
6
 
7
+ app = FastAPI()
8
 
9
+ # Model and token
10
+ model_name = "meta-llama/Llama-4-11B-Instruct"
11
+ token = os.getenv("test") # use the secret you named in HF Space
12
 
13
+ # Load tokenizer + model
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token)
15
  model = AutoModelForCausalLM.from_pretrained(
16
  model_name,
17
  torch_dtype=torch.float16,
18
  device_map="auto",
19
+ use_auth_token=token
20
  )
21
+
22
+ # Input schema
23
+ class InputText(BaseModel):
24
+ text: str
25
+
26
+ # API endpoint
27
+ @app.post("/predict")
28
+ def predict(item: InputText):
29
+ inputs = tokenizer(item.text, return_tensors="pt").to(model.device)
30
+ outputs = model.generate(**inputs, max_new_tokens=200)
31
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
32
+ return {"result": result}