Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ import gradio as gr
|
|
| 4 |
import spaces
|
| 5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 6 |
import subprocess
|
| 7 |
-
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 8 |
|
| 9 |
BANNER_HTML = """
|
| 10 |
<p align="center">
|
|
@@ -37,7 +37,7 @@ def load_model(version):
|
|
| 37 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2")
|
| 38 |
return f"Model {model_name} loaded."
|
| 39 |
|
| 40 |
-
@spaces.GPU(duration=
|
| 41 |
def stream_chat(message: str, history: list, system_prompt: str, model_version: str, temperature: float, max_new_tokens: int):
|
| 42 |
conversation = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
|
| 43 |
for prompt, answer in history:
|
|
|
|
| 4 |
import spaces
|
| 5 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 6 |
import subprocess
|
| 7 |
+
#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
| 8 |
|
| 9 |
BANNER_HTML = """
|
| 10 |
<p align="center">
|
|
|
|
| 37 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto", attn_implementation="flash_attention_2")
|
| 38 |
return f"Model {model_name} loaded."
|
| 39 |
|
| 40 |
+
@spaces.GPU(duration=60)
|
| 41 |
def stream_chat(message: str, history: list, system_prompt: str, model_version: str, temperature: float, max_new_tokens: int):
|
| 42 |
conversation = [{"role": "system", "content": system_prompt or DEFAULT_SYSTEM_PROMPT}]
|
| 43 |
for prompt, answer in history:
|