Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,10 +5,14 @@
|
|
| 5 |
|
| 6 |
# %%writefile demo-4bit.py
|
| 7 |
|
|
|
|
|
|
|
| 8 |
from transformers import AutoModel, AutoTokenizer
|
| 9 |
import gradio as gr
|
| 10 |
import mdtex2html
|
| 11 |
|
|
|
|
|
|
|
| 12 |
model_name = "THUDM/chatglm2-6b"
|
| 13 |
model_name = "THUDM/chatglm2-6b-int4"
|
| 14 |
|
|
@@ -33,7 +37,6 @@ model = model.eval()
|
|
| 33 |
|
| 34 |
_ = """Override Chatbot.postprocess"""
|
| 35 |
|
| 36 |
-
|
| 37 |
def postprocess(self, y):
|
| 38 |
if y is None:
|
| 39 |
return []
|
|
@@ -100,9 +103,22 @@ def reset_state():
|
|
| 100 |
return [], [], None
|
| 101 |
|
| 102 |
|
| 103 |
-
with gr.Blocks() as demo:
|
| 104 |
gr.HTML("""<h1 align="center">ChatGLM2-6B-int4</h1>""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
|
|
|
|
|
|
|
|
|
|
| 106 |
chatbot = gr.Chatbot()
|
| 107 |
with gr.Row():
|
| 108 |
with gr.Column(scale=4):
|
|
|
|
| 5 |
|
| 6 |
# %%writefile demo-4bit.py
|
| 7 |
|
| 8 |
+
from textwrap import dedent
|
| 9 |
+
|
| 10 |
from transformers import AutoModel, AutoTokenizer
|
| 11 |
import gradio as gr
|
| 12 |
import mdtex2html
|
| 13 |
|
| 14 |
+
# from loguru import logger
|
| 15 |
+
|
| 16 |
model_name = "THUDM/chatglm2-6b"
|
| 17 |
model_name = "THUDM/chatglm2-6b-int4"
|
| 18 |
|
|
|
|
| 37 |
|
| 38 |
_ = """Override Chatbot.postprocess"""
|
| 39 |
|
|
|
|
| 40 |
def postprocess(self, y):
|
| 41 |
if y is None:
|
| 42 |
return []
|
|
|
|
| 103 |
return [], [], None
|
| 104 |
|
| 105 |
|
| 106 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 107 |
gr.HTML("""<h1 align="center">ChatGLM2-6B-int4</h1>""")
|
| 108 |
+
with gr.Accordion("Info", open=False):
|
| 109 |
+
_ = """
|
| 110 |
+
Takes from 30 seconds to a few hundred seconds, dependent on the number of words/characters
|
| 111 |
+
the question and answer contain.
|
| 112 |
+
|
| 113 |
+
Low temperature: responses will be more deterministic and focused; High temperature: more creative
|
| 114 |
+
|
| 115 |
+
Suggested temperatures -- translation: up to 0.3; chatting: > 0.4
|
| 116 |
+
|
| 117 |
+
Top P controls dynamic vocabulary selection based on context.
|
| 118 |
|
| 119 |
+
For a table of of example values for different scenarios, refer to https://community.openai.com/t/cheat-sheet-mastering-temperature-and-top-p-in-chatgpt-api-a-few-tips-and-tricks-on-controlling-the-creativity-deterministic-output-of-prompt-responses/172683
|
| 120 |
+
"""
|
| 121 |
+
gr.Markdown(dedent(_))
|
| 122 |
chatbot = gr.Chatbot()
|
| 123 |
with gr.Row():
|
| 124 |
with gr.Column(scale=4):
|