Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on Nov 6

Commit

8c55f6d

verified ·

1 Parent(s): 41f8d59

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -8

app.py CHANGED Viewed

@@ -171,9 +171,11 @@ class MultiScaleRetention(nn.Module):
             batch_size, seq_len, self.hidden_size
         )
-        # ✅ Group norm - ensure it's on the correct device
         if not next(self.group_norm.parameters()).is_cuda and retention_states.is_cuda:
-            self.group_norm = self.group_norm.to(retention_states.device)
         retention_states = self.group_norm(
             retention_states.transpose(1, 2)
@@ -277,14 +279,22 @@ class HierarchicalRetention(nn.Module):
         if past_key_values is not None:
             past_key_value = past_key_values
-        # ✅ Ensure all submodules are on correct device
         target_device = hidden_states.device
         if not next(self.short_proj.parameters()).is_cuda and hidden_states.is_cuda:
-            self.short_proj = self.short_proj.to(target_device)
-            self.medium_proj = self.medium_proj.to(target_device)
-            self.long_proj = self.long_proj.to(target_device)
-            self.fusion = self.fusion.to(target_device)
-            self.norm = self.norm.to(target_device)
         # Base Retention
         retention_output, attn_weights, past_kv = self.base_retention(

             batch_size, seq_len, self.hidden_size
         )
+        # ✅ Group norm - ensure it's on the correct device AND dtype
         if not next(self.group_norm.parameters()).is_cuda and retention_states.is_cuda:
+            self.group_norm = self.group_norm.to(retention_states.device, dtype=retention_states.dtype)
+        elif next(self.group_norm.parameters()).dtype != retention_states.dtype:
+            self.group_norm = self.group_norm.to(dtype=retention_states.dtype)
         retention_states = self.group_norm(
             retention_states.transpose(1, 2)
         if past_key_values is not None:
             past_key_value = past_key_values
+        # ✅ Ensure all submodules are on correct device AND dtype
         target_device = hidden_states.device
+        target_dtype = hidden_states.dtype
         if not next(self.short_proj.parameters()).is_cuda and hidden_states.is_cuda:
+            self.short_proj = self.short_proj.to(target_device, dtype=target_dtype)
+            self.medium_proj = self.medium_proj.to(target_device, dtype=target_dtype)
+            self.long_proj = self.long_proj.to(target_device, dtype=target_dtype)
+            self.fusion = self.fusion.to(target_device, dtype=target_dtype)
+            self.norm = self.norm.to(target_device, dtype=target_dtype)
+        elif next(self.short_proj.parameters()).dtype != target_dtype:
+            self.short_proj = self.short_proj.to(dtype=target_dtype)
+            self.medium_proj = self.medium_proj.to(dtype=target_dtype)
+            self.long_proj = self.long_proj.to(dtype=target_dtype)
+            self.fusion = self.fusion.to(dtype=target_dtype)
+            self.norm = self.norm.to(dtype=target_dtype)
         # Base Retention
         retention_output, attn_weights, past_kv = self.base_retention(