Spaces:
Runtime error
Runtime error
| from dataclasses import dataclass | |
| from typing import Optional | |
| import os | |
| import torch | |
| from dotenv import load_dotenv | |
| from cpufeature import CPUFeature | |
| from petals.constants import PUBLIC_INITIAL_PEERS | |
| from huggingface_hub import login | |
| class ModelInfo: | |
| repo: str | |
| adapter: Optional[str] = None | |
| load_dotenv() | |
| hugging_face_token = os.getenv("HUGGINGFACE_TOKEN") | |
| login(token=hugging_face_token) | |
| MODELS = [ | |
| ModelInfo(repo="meta-llama/Llama-2-70b-hf"), | |
| ModelInfo(repo="meta-llama/Llama-2-70b-chat-hf"), | |
| #ModelInfo(repo="enoch/llama-65b-hf"), | |
| #ModelInfo(repo="enoch/llama-65b-hf", adapter="timdettmers/guanaco-65b"), | |
| # ModelInfo(repo="bigscience/bloom"), | |
| #ModelInfo(repo="bigscience/bloomz"), | |
| ] | |
| DEFAULT_MODEL_NAME = "meta-llama/Llama-2-70b-chat-hf" | |
| INITIAL_PEERS = PUBLIC_INITIAL_PEERS | |
| # Set this to a list of multiaddrs to connect to a private swarm instead of the public one, for example: | |
| # INITIAL_PEERS = ['/ip4/10.1.2.3/tcp/31234/p2p/QmcXhze98AcgGQDDYna23s4Jho96n8wkwLJv78vxtFNq44'] | |
| DEVICE = "cpu" | |
| if DEVICE == "cuda": | |
| TORCH_DTYPE = "auto" | |
| elif CPUFeature["AVX512f"] and CPUFeature["OS_AVX512"]: | |
| TORCH_DTYPE = torch.bfloat16 | |
| else: | |
| TORCH_DTYPE = torch.float32 # You can use bfloat16 in this case too, but it will be slow | |
| STEP_TIMEOUT = 5 * 60 | |
| MAX_SESSIONS = 50 # Has effect only for API v1 (HTTP-based) | |