Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on 27 days ago

Commit

7e1dc71

verified ·

1 Parent(s): cce66a2

Update app.py

Browse files

Files changed (1) hide show

app.py +175 -161

app.py CHANGED Viewed

@@ -681,25 +681,17 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
 # =====================================================
 def generate_modeling_phoenix_code():
-    """
-    PHOENIX Custom Modeling Code 생성 v1.4.3
-    ✅ FIX: Retention 변환 포함
-    """
-    modeling_code = '''"""
-PHOENIX Retention Model - Custom Implementation v1.4.3
-Auto-loaded by HuggingFace transformers with trust_remote_code=True
-✅ FIX v1.4.3: Retention 변환 자동 실행
-✅ FIX v1.4.2: Embedding Tying 개선
-✅ FIX v1.4.1: State Dict 직접 로드
-VIDraft AI Research Lab
 """
 import torch
 import torch.nn as nn
-from typing import Optional, Tuple, Union
 from transformers.modeling_utils import PreTrainedModel
 from transformers.configuration_utils import PretrainedConfig
 from transformers import AutoConfig, AutoModelForCausalLM
@@ -707,181 +699,203 @@ import os
 class PhoenixConfig(PretrainedConfig):
-    """PHOENIX Model Configuration"""
     model_type = "phoenix"
-    def __init__(
-        self,
-        use_phoenix_retention=True,
-        phoenix_version="1.4.3",
-        original_architecture=None,
-        original_model=None,
-        **kwargs
-    ):
         super().__init__(**kwargs)
         self.use_phoenix_retention = use_phoenix_retention
         self.phoenix_version = phoenix_version
-        self.original_architecture = original_architecture
         self.original_model = original_model
-# ✅ CRITICAL: Retention 클래스들을 포함해야 함!
-# (여기에 MultiScaleRetention, HierarchicalRetention 전체 코드 삽입)
-def replace_attention_with_retention_for_loading(model, use_hierarchical=True):
-    """
-    Hub 로드 시 자동으로 Attention → Retention 변환
-    """
-    print("🔄 Converting Attention → Retention for loaded model...")
-    layers = None
-    if hasattr(model, 'model') and hasattr(model.model, 'layers'):
-        layers = model.model.layers
-    if layers is None:
-        print("❌ Cannot find layers")
-        return model, 0, 0
-    replaced_count = 0
-    for layer_idx, layer in enumerate(layers):
         if hasattr(layer, 'self_attn'):
-            if use_hierarchical:
-                new_retention = HierarchicalRetention(model.config, layer_idx)
-            else:
-                new_retention = MultiScaleRetention(model.config, layer_idx)
-            layer.self_attn = new_retention
-            replaced_count += 1
-    print(f"✅ Converted {replaced_count}/{len(layers)} layers")
-    return model, replaced_count, len(layers)
 class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
-    """
-    PHOENIX Model for Causal Language Modeling v1.4.3
-    ✅ FIX: Retention 자동 변환 포함
-    """
     def __init__(self, config):
         super().__init__(config)
-        self.config = config
-        self._original_model = None
-        self._initialized = False
     @classmethod
-    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
-        """🔥 PHOENIX 자동 로딩! v1.4.3"""
-        print(f"🔥 Loading PHOENIX model from {pretrained_model_name_or_path}")
-        config = AutoConfig.from_pretrained(pretrained_model_name_or_path, trust_remote_code=True)
-        original_model = getattr(config, 'original_model', 'Qwen/Qwen3-0.6B')
-        use_hierarchical = getattr(config, 'use_hierarchical', True)
-        print(f"   📋 Original model: {original_model}")
-        print(f"   🔄 Hierarchical: {use_hierarchical}")
         try:
-            base_config = AutoConfig.from_pretrained(original_model, trust_remote_code=True)
         except:
-            base_config = config
-        base_model = AutoModelForCausalLM.from_config(base_config)
-        print(f"   ✅ Created base structure")
-        # ✅ CRITICAL FIX: Retention 변환 실행!
-        base_model, converted, total = replace_attention_with_retention_for_loading(
-            base_model, use_hierarchical
-        )
-        # state_dict 로드
-        state_dict = None
-        if os.path.exists(pretrained_model_name_or_path):
-            safetensors_path = os.path.join(pretrained_model_name_or_path, "model.safetensors")
-            pytorch_path = os.path.join(pretrained_model_name_or_path, "pytorch_model.bin")
-            if os.path.exists(safetensors_path):
-                try:
-                    from safetensors.torch import load_file
-                    state_dict = load_file(safetensors_path)
-                    print(f"   ✅ Loaded from safetensors")
-                except:
-                    pass
-            if state_dict is None and os.path.exists(pytorch_path):
-                state_dict = torch.load(pytorch_path, map_location='cpu')
-                print(f"   ✅ Loaded from pytorch_model.bin")
         else:
-            try:
-                from huggingface_hub import hf_hub_download
                 try:
-                    safetensors_path = hf_hub_download(
-                        repo_id=pretrained_model_name_or_path,
-                        filename="model.safetensors"
-                    )
-                    from safetensors.torch import load_file
-                    state_dict = load_file(safetensors_path)
-                    print(f"   ✅ Loaded from Hub (safetensors)")
-                except:
-                    pytorch_path = hf_hub_download(
-                        repo_id=pretrained_model_name_or_path,
-                        filename="pytorch_model.bin"
-                    )
-                    state_dict = torch.load(pytorch_path, map_location='cpu')
-                    print(f"   ✅ Loaded from Hub (pytorch_model.bin)")
-            except Exception as e:
-                print(f"   ❌ Failed to load weights: {e}")
-        if state_dict is not None:
-            try:
-                missing, unexpected = base_model.load_state_dict(state_dict, strict=False)
-                print(f"   ✅ Weights loaded")
-                print(f"      Missing keys: {len(missing)}")
-                print(f"      Unexpected keys: {len(unexpected)}")
-                # ✅ Embedding Tying
-                if 'lm_head.weight' in missing:
-                    print(f"   ⚠️ lm_head.weight missing - checking tie_word_embeddings...")
-                    tie_embeddings = getattr(config, 'tie_word_embeddings', False)
-                    print(f"      tie_word_embeddings: {tie_embeddings}")
-                    if tie_embeddings and hasattr(base_model, 'lm_head') and hasattr(base_model, 'model'):
-                        if hasattr(base_model.model, 'embed_tokens'):
-                            print(f"   🔗 Tying lm_head.weight to embed_tokens.weight...")
-                            base_model.lm_head.weight = base_model.model.embed_tokens.weight
-                            print(f"   ✅ Embedding tying applied!")
-                retention_keys = [k for k in state_dict.keys() if 'retention' in k.lower()]
-                if retention_keys:
-                    print(f"   ✅ Found {len(retention_keys)} Retention weight keys")
-            except Exception as e:
-                print(f"   ⚠️ Weight loading warning: {e}")
-        phoenix_instance = cls(config)
-        phoenix_instance._original_model = base_model
-        phoenix_instance._initialized = True
-        print(f"✅ PHOENIX model ready!")
-        return phoenix_instance
-    def forward(self, *args, **kwargs):
-        if not self._initialized or self._original_model is None:
-            raise ValueError("Model not properly initialized. Use from_pretrained().")
-        return self._original_model(*args, **kwargs)
-    def generate(self, *args, **kwargs):
-        if not self._initialized or self._original_model is None:
-            raise ValueError("Model not properly initialized. Use from_pretrained().")
-        return self._original_model.generate(*args, **kwargs)
 AutoConfig.register("phoenix", PhoenixConfig)

 # =====================================================
 def generate_modeling_phoenix_code():
+    """PHOENIX Custom Modeling Code v1.4.3 - COMPLETE"""
+    return '''"""
+PHOENIX Retention Model v1.4.3
+✅ PhoenixPreTrainedModel 베이스 클래스 포함
+✅ 모든 Retention 클래스 완전 구현
 """
 import torch
 import torch.nn as nn
+from typing import Optional, Tuple
 from transformers.modeling_utils import PreTrainedModel
 from transformers.configuration_utils import PretrainedConfig
 from transformers import AutoConfig, AutoModelForCausalLM
 class PhoenixConfig(PretrainedConfig):
     model_type = "phoenix"
+    def __init__(self, use_phoenix_retention=True, phoenix_version="1.4.3",
+                 original_model=None, use_hierarchical=True, **kwargs):
         super().__init__(**kwargs)
         self.use_phoenix_retention = use_phoenix_retention
         self.phoenix_version = phoenix_version
         self.original_model = original_model
+        self.use_hierarchical = use_hierarchical
+class MultiScaleRetention(nn.Module):
+    def __init__(self, config, layer_idx=0):
+        super().__init__()
+        self.hidden_size = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_dim = getattr(config, 'head_dim', self.hidden_size // self.num_heads)
+        self.num_key_value_heads = getattr(config, 'num_key_value_heads', self.num_heads)
+        self.num_key_value_groups = self.num_heads // self.num_key_value_heads
+        self.q_dim = self.num_heads * self.head_dim
+        self.kv_dim = self.num_key_value_heads * self.head_dim
+        self.q_proj = nn.Linear(self.hidden_size, self.q_dim, bias=False)
+        self.k_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
+        self.v_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
+        self.o_proj = nn.Linear(self.q_dim, self.hidden_size, bias=False)
+        self.decay = nn.Parameter(torch.linspace(0.95, 0.99, self.num_heads))
+        self.group_norm = nn.GroupNorm(self.num_heads, self.q_dim)
+    def _repeat_kv(self, x, n):
+        b, h, s, d = x.shape
+        if n == 1: return x
+        return x[:, :, None, :, :].expand(b, h, n, s, d).reshape(b, h*n, s, d)
+    def forward(self, hidden_states, **kwargs):
+        b, s, _ = hidden_states.shape
+        device, dtype = hidden_states.device, hidden_states.dtype
+        if self.q_proj.weight.device != device:
+            self.to(device=device, dtype=dtype)
+        q = self.q_proj(hidden_states).view(b, s, self.num_heads, self.head_dim).transpose(1, 2)
+        k = self.k_proj(hidden_states).view(b, s, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        v = self.v_proj(hidden_states).view(b, s, self.num_key_value_heads, self.head_dim).transpose(1, 2)
+        k = self._repeat_kv(k, self.num_key_value_groups)
+        v = self._repeat_kv(v, self.num_key_value_groups)
+        out = self._retention(q, k, v)
+        out = out.transpose(1, 2).reshape(b, s, self.q_dim)
+        out = self.group_norm(out.transpose(1, 2)).transpose(1, 2)
+        return (self.o_proj(torch.clamp(out, -10, 10)), None)
+    def _retention(self, q, k, v):
+        b, h, s, d = q.shape
+        state = torch.zeros(b, h, d, d, dtype=q.dtype, device=q.device) + 1e-6
+        decay = torch.sigmoid(self.decay).view(1, -1, 1, 1).to(q)
+        outs = []
+        for t in range(s):
+            state = decay * state + torch.clamp(torch.einsum('bhd,bhe->bhde', k[:,:,t], v[:,:,t]), -5, 5)
+            state = torch.clamp(state, -10, 10)
+            outs.append(torch.einsum('bhd,bhde->bhe', q[:,:,t], state))
+        return torch.stack(outs, dim=2)
+class HierarchicalRetention(nn.Module):
+    def __init__(self, config, layer_idx=0):
+        super().__init__()
+        self.base_retention = MultiScaleRetention(config, layer_idx)
+        h = config.hidden_size
+        self.d_state = h // 2
+        self.short_proj = nn.Linear(h, self.d_state)
+        self.medium_proj = nn.Linear(self.d_state, self.d_state)
+        self.long_proj = nn.Linear(self.d_state, self.d_state*2)
+        self.fusion = nn.Linear(self.d_state*4, h)
+        self.norm = nn.LayerNorm(h)
+        self.decays = [0.5, 0.8, 0.95]
+    def forward(self, x, **kwargs):
+        b, s, h = x.shape
+        device, dtype = x.device, x.dtype
+        if next(self.short_proj.parameters()).device != device:
+            self.to(device=device, dtype=dtype)
+        ret_out = self.base_retention(x)[0]
+        short = torch.zeros(b, self.d_state, dtype=dtype, device=device)
+        med = torch.zeros(b, self.d_state, dtype=dtype, device=device)
+        long = torch.zeros(b, self.d_state*2, dtype=dtype, device=device)
+        outs = []
+        for t in range(s):
+            short = self.decays[0]*short + self.short_proj(ret_out[:,t])
+            if t % 8 == 0: med = self.decays[1]*med + self.medium_proj(short)
+            if t % 64 == 0: long = self.decays[2]*long + self.long_proj(med)
+            outs.append(self.fusion(torch.cat([short, med, long], -1)))
+        return (self.norm(torch.stack(outs, 1)), None)
+def replace_attention_with_retention_for_loading(model, use_hierarchical=True):
+    layers = getattr(model, 'model', model)
+    layers = getattr(layers, 'layers', getattr(layers, 'h', getattr(layers, 'layers', None)))
+    if layers is None: return model, 0, 0
+    cnt = 0
+    for i, layer in enumerate(layers):
         if hasattr(layer, 'self_attn'):
+            layer.self_attn = HierarchicalRetention(model.config, i) if use_hierarchical else MultiScaleRetention(model.config, i)
+            cnt += 1
+    return model, cnt, len(layers)
+# ✅ CRITICAL: PhoenixPreTrainedModel 베이스 클래스
+class PhoenixPreTrainedModel(PreTrainedModel):
+    config_class = PhoenixConfig
+    base_model_prefix = "phoenix"
+    supports_gradient_checkpointing = True
+    _no_split_modules = ["MultiScaleRetention", "HierarchicalRetention"]
+    def _init_weights(self, m):
+        std = getattr(self.config, 'initializer_range', 0.02)
+        if isinstance(m, nn.Linear):
+            m.weight.data.normal_(0, std)
+            if m.bias is not None: m.bias.data.zero_()
+        elif isinstance(m, nn.Embedding):
+            m.weight.data.normal_(0, std)
+            if m.padding_idx: m.weight.data[m.padding_idx].zero_()
 class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
+        self._model = None
+        self._ready = False
     @classmethod
+    def from_pretrained(cls, path, *args, **kwargs):
+        print(f"🔥 PHOENIX v1.4.3 loading from {path}")
+        config = AutoConfig.from_pretrained(path, trust_remote_code=True)
+        orig = getattr(config, 'original_model', 'Qwen/Qwen3-0.6B')
+        hier = getattr(config, 'use_hierarchical', True)
         try:
+            base_cfg = AutoConfig.from_pretrained(orig, trust_remote_code=True)
         except:
+            base_cfg = config
+        model = AutoModelForCausalLM.from_config(base_cfg)
+        model, conv, tot = replace_attention_with_retention_for_loading(model, hier)
+        print(f"   ✅ Converted {conv}/{tot} layers")
+        # 가중치 로드
+        sd = None
+        if os.path.exists(path):
+            for fname in ["model.safetensors", "pytorch_model.bin"]:
+                fpath = os.path.join(path, fname)
+                if os.path.exists(fpath):
+                    if fname.endswith('.safetensors'):
+                        from safetensors.torch import load_file
+                        sd = load_file(fpath)
+                    else:
+                        sd = torch.load(fpath, map_location='cpu')
+                    break
         else:
+            from huggingface_hub import hf_hub_download
+            for fname in ["model.safetensors", "pytorch_model.bin"]:
                 try:
+                    fpath = hf_hub_download(path, fname)
+                    if fname.endswith('.safetensors'):
+                        from safetensors.torch import load_file
+                        sd = load_file(fpath)
+                    else:
+                        sd = torch.load(fpath, map_location='cpu')
+                    break
+                except: pass
+        if sd:
+            miss, unex = model.load_state_dict(sd, strict=False)
+            print(f"   📦 Weights: {len(miss)} missing, {len(unex)} unexpected")
+            if 'lm_head.weight' in miss and getattr(config, 'tie_word_embeddings', False):
+                if hasattr(model, 'lm_head') and hasattr(model.model, 'embed_tokens'):
+                    model.lm_head.weight = model.model.embed_tokens.weight
+                    print(f"   🔗 Tied embeddings")
+        inst = cls(config)
+        inst._model = model
+        inst._ready = True
+        print(f"✅ PHOENIX v1.4.3 ready!")
+        return inst
+    def forward(self, *a, **k):
+        if not self._ready: raise ValueError("Not initialized")
+        return self._model(*a, **k)
+    def generate(self, *a, **k):
+        if not self._ready: raise ValueError("Not initialized")
+        return self._model.generate(*a, **k)
 AutoConfig.register("phoenix", PhoenixConfig)