sky-2002
/

deepseek-tinystories-60M

Text Generation

mixture-of-experts

Mixture of Experts

multi-head-latent-attention

Model card Files Files and versions

sky-2002 commited on Oct 5, 2025

Commit

13a7e6f

·

verified ·

1 Parent(s): 162c0d8

Upload utils.py

Files changed (1) hide show

utils.py +45 -0

utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import torch
+from typing import Optional
+import torch.nn.functional as F
+def load_model(checkpoint_path, model):
+    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    model.load_state_dict(checkpoint["model"])
+    model.eval()
+    return model
+def generate_text(
+    model,
+    data_processor,
+    prompt: str,
+    max_new_tokens: int,
+    temperature: float = 1.0,
+    top_k: Optional[int] = None,
+    device: str = "cpu",
+):
+    model.eval()
+    tokens = data_processor.tokenize(prompt)
+    input_ids = torch.tensor(tokens, dtype=torch.long, device=device).unsqueeze(0)
+    with torch.no_grad():
+        for _ in range(max_new_tokens):
+            # crop input_ids if it exceeds the context size
+            if input_ids.size(1) > model.config.max_token_len:
+                input_ids = input_ids[:, -model.config.max_token_len :]
+            logits = model(input_ids)
+            logits = logits[:, -1, :] / temperature  # get the logits for the last token
+            if top_k is not None:
+                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                logits[logits < v[:, [-1]]] = -float("inf")
+            probs = F.softmax(logits, dim=-1)
+            next_token = torch.multinomial(probs, num_samples=1)
+            input_ids = torch.cat((input_ids, next_token), dim=1)
+    output_tokens = input_ids[0].tolist()
+    generated_text = data_processor.detokenize(output_tokens)
+    return generated_text