Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on 27 days ago

Commit

238a77b

verified ·

1 Parent(s): caf3990

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -22

app.py CHANGED Viewed

@@ -301,18 +301,16 @@ class HierarchicalRetention(nn.Module):
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
-        if not next(self.short_proj.parameters()).is_cuda and hidden_states.is_cuda:
-            self.short_proj = self.short_proj.to(target_device, dtype=target_dtype)
-            self.medium_proj = self.medium_proj.to(target_device, dtype=target_dtype)
-            self.long_proj = self.long_proj.to(target_device, dtype=target_dtype)
-            self.fusion = self.fusion.to(target_device, dtype=target_dtype)
-            self.norm = self.norm.to(target_device, dtype=target_dtype)
-        elif next(self.short_proj.parameters()).dtype != target_dtype:
-            self.short_proj = self.short_proj.to(dtype=target_dtype)
-            self.medium_proj = self.medium_proj.to(dtype=target_dtype)
-            self.long_proj = self.long_proj.to(dtype=target_dtype)
-            self.fusion = self.fusion.to(dtype=target_dtype)
-            self.norm = self.norm.to(dtype=target_dtype)
         base_result = self.base_retention(
             hidden_states, attention_mask, position_ids,
@@ -322,9 +320,9 @@ class HierarchicalRetention(nn.Module):
         retention_output = base_result[0]
         # Hierarchical states
-        short_state = torch.zeros(batch_size, self.d_state, dtype=hidden_states.dtype, device=target_device)
-        medium_state = torch.zeros(batch_size, self.d_state, dtype=hidden_states.dtype, device=target_device)
-        long_state = torch.zeros(batch_size, self.d_state * 2, dtype=hidden_states.dtype, device=target_device)
         hierarchical_outputs = []
@@ -686,12 +684,16 @@ class HierarchicalRetention(nn.Module):
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
-        if not next(self.short_proj.parameters()).is_cuda and hidden_states.is_cuda:
-            self.short_proj = self.short_proj.to(target_device, dtype=target_dtype)
-            self.medium_proj = self.medium_proj.to(target_device, dtype=target_dtype)
-            self.long_proj = self.long_proj.to(target_device, dtype=target_dtype)
-            self.fusion = self.fusion.to(target_device, dtype=target_dtype)
-            self.norm = self.norm.to(target_device, dtype=target_dtype)
         base_result = self.base_retention(
             hidden_states, attention_mask, position_ids,
@@ -871,7 +873,8 @@ class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
 # Auto-registration
 AutoConfig.register("phoenix", PhoenixConfig)
-'''
     return modeling_code

         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
+        # ✅ 개선된 dtype/device 체크
+        current_device = next(self.short_proj.parameters()).device
+        current_dtype = next(self.short_proj.parameters()).dtype
+        if current_device != target_device or current_dtype != target_dtype:
+            self.short_proj = self.short_proj.to(device=target_device, dtype=target_dtype)
+            self.medium_proj = self.medium_proj.to(device=target_device, dtype=target_dtype)
+            self.long_proj = self.long_proj.to(device=target_device, dtype=target_dtype)
+            self.fusion = self.fusion.to(device=target_device, dtype=target_dtype)
+            self.norm = self.norm.to(device=target_device, dtype=target_dtype)
         base_result = self.base_retention(
             hidden_states, attention_mask, position_ids,
         retention_output = base_result[0]
         # Hierarchical states
+        short_state = torch.zeros(batch_size, self.d_state, dtype=target_dtype, device=target_device)
+        medium_state = torch.zeros(batch_size, self.d_state, dtype=target_dtype, device=target_device)
+        long_state = torch.zeros(batch_size, self.d_state * 2, dtype=target_dtype, device=target_device)
         hierarchical_outputs = []
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
+        # ✅ 개선된 dtype/device 체크
+        current_device = next(self.short_proj.parameters()).device
+        current_dtype = next(self.short_proj.parameters()).dtype
+        if current_device != target_device or current_dtype != target_dtype:
+            self.short_proj = self.short_proj.to(device=target_device, dtype=target_dtype)
+            self.medium_proj = self.medium_proj.to(device=target_device, dtype=target_dtype)
+            self.long_proj = self.long_proj.to(device=target_device, dtype=target_dtype)
+            self.fusion = self.fusion.to(device=target_device, dtype=target_dtype)
+            self.norm = self.norm.to(device=target_device, dtype=target_dtype)
         base_result = self.base_retention(
             hidden_states, attention_mask, position_ids,
 # Auto-registration
 AutoConfig.register("phoenix", PhoenixConfig)
+'''
     return modeling_code