w32zhong
/

fearless-grass-116

qwen3

custom_code

Model card Files Files and versions

xet

Community

w32zhong commited on Oct 17, 2025

Commit

1a06636

verified ·

1 Parent(s): 65597fc

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

config.json +80 -0
modeling_speculative_qwen3.py +51 -0

config.json ADDED Viewed

	@@ -0,0 +1,80 @@

+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoModelForCausalLM": "modeling_speculative_qwen3.SpeculativeQwen3ForCausalLM"
+  },
+  "bos_token_id": 151643,
+  "draft_layers": 1,
+  "dtype": "float32",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 262144,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 0,
+  "num_hidden_layers_free": 36,
+  "num_key_value_heads": 8,
+  "ploss_w": 0.1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000,
+  "skip_first_input_layernorm": true,
+  "skip_output_norm": true,
+  "sliding_window": null,
+  "speculative_decoding_algorithm": "EagleV2",
+  "speculative_decoding_base_model_path": "Qwen/Qwen3-4B-Instruct-2507",
+  "speculative_decoding_draft_model": "Qwen3MoeDrafter",
+  "tie_word_embeddings": true,
+  "transformers_version": "4.56.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vloss_w": 1.0,
+  "vocab_size": 151936
+}

modeling_speculative_qwen3.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch
+from transformers.models.qwen3.modeling_qwen3 import *
+from specforge_het.specforge_lm import SpecForgeLM
+class Qwen3Drafter(Qwen3Model):
+    def __init__(self, draft_config, base_model):
+        draft_config.num_hidden_layers = base_model.config.draft_layers
+        draft_config.hidden_size = base_model.get_hidden_size()
+        super().__init__(draft_config)
+        if base_model.config.skip_first_input_layernorm:
+            layer = self.layers[0]
+            delattr(layer, 'input_layernorm')
+            layer.input_layernorm = torch.nn.Identity()
+        if base_model.config.skip_output_norm:
+            delattr(self, 'norm')
+            self.norm = torch.nn.Identity()
+        delattr(self, 'embed_tokens')
+    def get_hidden_size(self):
+        return self.config.hidden_size
+class SpeculativeQwen3ForCausalLM(SpecForgeLM, Qwen3ForCausalLM):
+    @property
+    def base_model(self):
+        return self.model
+    def get_hidden_size(self):
+        return self.config.hidden_size
+    def get_base_layers(self):
+        return self.base_model.layers
+    def get_token_embedding(self, input_ids):
+        return self.base_model.embed_tokens(input_ids)
+    def get_positional_embedding(self, t, position_ids):
+        return self.base_model.rotary_emb(t, position_ids)
+    def get_token_logits(self, hidden_states):
+        return self.lm_head(hidden_states)
+    def get_max_ctx_length(self):
+        return self.model.config.max_position_embeddings
+    def save_pretrained(self, path, **kwargs):
+        return self.save_speculative_model(path, **kwargs)