Spaces:
Sleeping
Sleeping
Commit
·
961fcf4
1
Parent(s):
21e5eec
env variable flash attn 0
Browse files
app.py
CHANGED
|
@@ -8,7 +8,8 @@ from huggingface_hub import login
|
|
| 8 |
import torch
|
| 9 |
import json
|
| 10 |
import bs4
|
| 11 |
-
|
|
|
|
| 12 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 13 |
|
| 14 |
product_strings = []
|
|
@@ -105,7 +106,6 @@ def load_model():
|
|
| 105 |
truncation=True,
|
| 106 |
padding=True, )
|
| 107 |
ankerbot_model.to("cuda")
|
| 108 |
-
prompt_format = "<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
|
| 109 |
generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
| 110 |
generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
| 111 |
load_model()
|
|
|
|
| 8 |
import torch
|
| 9 |
import json
|
| 10 |
import bs4
|
| 11 |
+
import os
|
| 12 |
+
os.environ["USE_FLASH_ATTENTION"] = "0"
|
| 13 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 14 |
|
| 15 |
product_strings = []
|
|
|
|
| 106 |
truncation=True,
|
| 107 |
padding=True, )
|
| 108 |
ankerbot_model.to("cuda")
|
|
|
|
| 109 |
generator = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
| 110 |
generator_mini = pipeline(task="text-generation", model=ankerbot_model, tokenizer=ankerbot_tokenizer, torch_dtype=torch.float16, attn_implementation="eager", use_flash_attention_2=False) # True for flash-attn2 else False
|
| 111 |
load_model()
|