Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,11 @@ from transformers import AutoTokenizer
|
|
| 6 |
import torch.nn as nn
|
| 7 |
import torch.nn.functional as F
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
|
|
|
| 11 |
|
|
|
|
| 12 |
class GELU(nn.Module):
|
| 13 |
def __init__(self):
|
| 14 |
super().__init__()
|
|
@@ -139,20 +141,64 @@ class GPTModel(nn.Module):
|
|
| 139 |
logits = self.out_head(x)
|
| 140 |
return logits
|
| 141 |
|
| 142 |
-
#
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
device = idx.device
|
| 145 |
-
current_device_type = str(device).split(':')[0]
|
| 146 |
|
| 147 |
for _ in range(max_new_tokens):
|
|
|
|
| 148 |
idx_cond = idx[:, -context_size:]
|
|
|
|
| 149 |
with torch.no_grad():
|
| 150 |
-
#
|
| 151 |
logits = model(idx_cond)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
idx = torch.cat((idx, idx_next), dim=1)
|
|
|
|
| 156 |
return idx
|
| 157 |
|
| 158 |
def text_to_token_ids(text, tokenizer):
|
|
@@ -164,7 +210,7 @@ def token_ids_to_text(token_ids, tokenizer):
|
|
| 164 |
flat = token_ids.squeeze(0)
|
| 165 |
return tokenizer.decode(flat.tolist(), skip_special_tokens=True)
|
| 166 |
|
| 167 |
-
# ================
|
| 168 |
|
| 169 |
# 模型 ID
|
| 170 |
model_id = "xingyu1996/tiger-gpt2"
|
|
@@ -190,7 +236,6 @@ def load_model_from_hub():
|
|
| 190 |
config = json.load(f)
|
| 191 |
|
| 192 |
# 将 Hugging Face 格式的配置转换为我们的格式
|
| 193 |
-
# 注意:这里的映射需要根据实际情况调整
|
| 194 |
my_config = {
|
| 195 |
"vocab_size": config.get("vocab_size", 50257),
|
| 196 |
"context_length": config.get("n_positions", 512),
|
|
@@ -204,7 +249,6 @@ def load_model_from_hub():
|
|
| 204 |
# 创建模型
|
| 205 |
model = GPTModel(my_config)
|
| 206 |
|
| 207 |
-
# 加载权重到模型
|
| 208 |
# 检查状态字典中是否有 _orig_mod. 前缀
|
| 209 |
if any(k.startswith('_orig_mod.') for k in state_dict.keys()):
|
| 210 |
state_dict = {k.replace('_orig_mod.', ''): v for k, v in state_dict.items()}
|
|
@@ -229,9 +273,9 @@ model, config = load_model_from_hub()
|
|
| 229 |
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
| 230 |
print("模型和分词器加载完成!")
|
| 231 |
|
| 232 |
-
# ================
|
| 233 |
|
| 234 |
-
def respond(message, history, max_tokens, temperature):
|
| 235 |
input_ids = text_to_token_ids(message, tokenizer).to("cpu") # Hugging Face Space 可能没有 GPU
|
| 236 |
context_size = config["context_length"]
|
| 237 |
|
|
@@ -241,7 +285,9 @@ def respond(message, history, max_tokens, temperature):
|
|
| 241 |
model=model,
|
| 242 |
idx=input_ids,
|
| 243 |
max_new_tokens=max_tokens,
|
| 244 |
-
context_size=context_size
|
|
|
|
|
|
|
| 245 |
)
|
| 246 |
|
| 247 |
# 解码生成的文本
|
|
@@ -263,10 +309,17 @@ demo = gr.ChatInterface(
|
|
| 263 |
respond,
|
| 264 |
additional_inputs=[
|
| 265 |
gr.Slider(minimum=1, maximum=100, value=30, step=1, label="生成长度"),
|
| 266 |
-
gr.Slider(minimum=0.
|
|
|
|
| 267 |
],
|
| 268 |
title=f"Tiger-GPT2 推理测试",
|
| 269 |
-
description="输入中文文本,模型将生成后续内容。此演示直接加载了原始模型权重,与本地推理行为一致。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
)
|
| 271 |
|
| 272 |
if __name__ == "__main__":
|
|
|
|
| 6 |
import torch.nn as nn
|
| 7 |
import torch.nn.functional as F
|
| 8 |
|
| 9 |
+
"""
|
| 10 |
+
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
|
| 11 |
+
"""
|
| 12 |
|
| 13 |
+
# ================ 第一步:定义模型结构 ================
|
| 14 |
class GELU(nn.Module):
|
| 15 |
def __init__(self):
|
| 16 |
super().__init__()
|
|
|
|
| 141 |
logits = self.out_head(x)
|
| 142 |
return logits
|
| 143 |
|
| 144 |
+
# ================ 第二步:定义文本生成函数 ================
|
| 145 |
+
|
| 146 |
+
def generate_text_simple(model, idx, max_new_tokens, context_size, temperature=1.0, top_k=None):
|
| 147 |
+
"""
|
| 148 |
+
使用 top_k 采样和温度缩放的文本生成函数
|
| 149 |
+
|
| 150 |
+
参数:
|
| 151 |
+
model: 语言模型
|
| 152 |
+
idx: 输入序列的 token ID
|
| 153 |
+
max_new_tokens: 要生成的最大新 token 数量
|
| 154 |
+
context_size: 上下文窗口大小
|
| 155 |
+
temperature: 温度参数,控制采样的随机性(越高越随机)
|
| 156 |
+
top_k: 只考虑概率最高的 top_k 个 token,如果为 None 或 0 则考虑所有 token
|
| 157 |
+
|
| 158 |
+
返回:
|
| 159 |
+
扩展后的 token ID 序列
|
| 160 |
+
"""
|
| 161 |
device = idx.device
|
|
|
|
| 162 |
|
| 163 |
for _ in range(max_new_tokens):
|
| 164 |
+
# 获取当前上下文
|
| 165 |
idx_cond = idx[:, -context_size:]
|
| 166 |
+
|
| 167 |
with torch.no_grad():
|
| 168 |
+
# 获取模型预测的下一个 token 的 logits
|
| 169 |
logits = model(idx_cond)
|
| 170 |
+
# 只关心最后一个位置的预测
|
| 171 |
+
logits = logits[:, -1, :]
|
| 172 |
+
|
| 173 |
+
# 应用温度缩放
|
| 174 |
+
if temperature > 0:
|
| 175 |
+
logits = logits / temperature
|
| 176 |
+
|
| 177 |
+
# 应用 top_k 过滤
|
| 178 |
+
if top_k is not None and top_k > 0:
|
| 179 |
+
# 获取前 k 个最大值
|
| 180 |
+
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
| 181 |
+
# 设置阈值为第 k 个最大值
|
| 182 |
+
threshold = v[..., [-1]]
|
| 183 |
+
# 将阈值以下的值设为 -inf
|
| 184 |
+
logits = torch.where(logits < threshold,
|
| 185 |
+
torch.full_like(logits, float('-inf')),
|
| 186 |
+
logits)
|
| 187 |
+
|
| 188 |
+
# 应用 softmax 转换为概率
|
| 189 |
+
probs = torch.softmax(logits, dim=-1)
|
| 190 |
+
|
| 191 |
+
# 根据概率分布采样
|
| 192 |
+
if temperature > 0:
|
| 193 |
+
# 随机采样
|
| 194 |
+
idx_next = torch.multinomial(probs, num_samples=1)
|
| 195 |
+
else:
|
| 196 |
+
# 如果温度为 0,则取最大概率的 token(等同于 argmax)
|
| 197 |
+
idx_next = torch.argmax(probs, dim=-1, keepdim=True)
|
| 198 |
+
|
| 199 |
+
# 将新生成的 token 添加到序列中
|
| 200 |
idx = torch.cat((idx, idx_next), dim=1)
|
| 201 |
+
|
| 202 |
return idx
|
| 203 |
|
| 204 |
def text_to_token_ids(text, tokenizer):
|
|
|
|
| 210 |
flat = token_ids.squeeze(0)
|
| 211 |
return tokenizer.decode(flat.tolist(), skip_special_tokens=True)
|
| 212 |
|
| 213 |
+
# ================ 第三步:设置模型加载和推理 ================
|
| 214 |
|
| 215 |
# 模型 ID
|
| 216 |
model_id = "xingyu1996/tiger-gpt2"
|
|
|
|
| 236 |
config = json.load(f)
|
| 237 |
|
| 238 |
# 将 Hugging Face 格式的配置转换为我们的格式
|
|
|
|
| 239 |
my_config = {
|
| 240 |
"vocab_size": config.get("vocab_size", 50257),
|
| 241 |
"context_length": config.get("n_positions", 512),
|
|
|
|
| 249 |
# 创建模型
|
| 250 |
model = GPTModel(my_config)
|
| 251 |
|
|
|
|
| 252 |
# 检查状态字典中是否有 _orig_mod. 前缀
|
| 253 |
if any(k.startswith('_orig_mod.') for k in state_dict.keys()):
|
| 254 |
state_dict = {k.replace('_orig_mod.', ''): v for k, v in state_dict.items()}
|
|
|
|
| 273 |
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
| 274 |
print("模型和分词器加载完成!")
|
| 275 |
|
| 276 |
+
# ================ 第四步:设置 Gradio 接口 ================
|
| 277 |
|
| 278 |
+
def respond(message, history, max_tokens, temperature, top_k):
|
| 279 |
input_ids = text_to_token_ids(message, tokenizer).to("cpu") # Hugging Face Space 可能没有 GPU
|
| 280 |
context_size = config["context_length"]
|
| 281 |
|
|
|
|
| 285 |
model=model,
|
| 286 |
idx=input_ids,
|
| 287 |
max_new_tokens=max_tokens,
|
| 288 |
+
context_size=context_size,
|
| 289 |
+
temperature=temperature,
|
| 290 |
+
top_k=top_k
|
| 291 |
)
|
| 292 |
|
| 293 |
# 解码生成的文本
|
|
|
|
| 309 |
respond,
|
| 310 |
additional_inputs=[
|
| 311 |
gr.Slider(minimum=1, maximum=100, value=30, step=1, label="生成长度"),
|
| 312 |
+
gr.Slider(minimum=0.0, maximum=2.0, value=0.7, step=0.1, label="温度 (0.0 表示无随机性)"),
|
| 313 |
+
gr.Slider(minimum=0, maximum=100, value=50, step=1, label="Top-K (0 表示不限制)"),
|
| 314 |
],
|
| 315 |
title=f"Tiger-GPT2 推理测试",
|
| 316 |
+
description="""输入中文文本,模型将生成后续内容。此演示直接加载了原始模型权重,与本地推理行为一致。
|
| 317 |
+
|
| 318 |
+
**参数说明**:
|
| 319 |
+
- **生成长度**: 要生成的最大token数量
|
| 320 |
+
- **温度**: 控制生成随机性,值越高越随机,值为0时始终选择最可能的词
|
| 321 |
+
- **Top-K**: 只从概率最高的K个词中选择下一个词,设为0则考虑所有词
|
| 322 |
+
""",
|
| 323 |
)
|
| 324 |
|
| 325 |
if __name__ == "__main__":
|