| from fastapi import FastAPI | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| app = FastAPI() | |
| # Define model-related constants | |
| MODEL_NAME = "arnir0/Tiny-LLM" | |
| # Global variables to store the tokenizer and model | |
| tokenizer = None | |
| model = None | |
| def greet_json(): | |
| global tokenizer, model | |
| # Load the model and tokenizer if not already loaded | |
| if model is None or tokenizer is None: | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) | |
| model.eval() # Set model to evaluation mode (optional for inference) | |
| return {"Hello": "World!", "model_status": "Loaded and hibernated!"} | |