Aduc-Sdr_Novim

Sleeping

App Files Files Community

CARLEXsX commited on Aug 16

Commit

5faf292

verified ·

1 Parent(s): 0f55e23

Delete teacache_helpers.py

Browse files

Files changed (1) hide show

teacache_helpers.py +0 -153

teacache_helpers.py DELETED Viewed

@@ -1,153 +0,0 @@
-# teacache_helpers.py
-import torch
-import numpy as np
-from diffusers.models.modeling_outputs import Transformer2DModelOutput
-from diffusers.utils import USE_PEFT_BACKEND, is_torch_version, logging, scale_lora_layers, unscale_lora_layers
-from typing import Any, Dict, Optional, Tuple
-logger = logging.get_logger(__name__)
-def teacache_forward(
-    self,
-    hidden_states: torch.Tensor,
-    indices_grid: torch.Tensor,
-    encoder_hidden_states: Optional[torch.Tensor] = None,
-    timestep: Optional[torch.LongTensor] = None,
-    class_labels: Optional[torch.LongTensor] = None,
-    cross_attention_kwargs: Dict[str, Any] = None,
-    attention_mask: Optional[torch.Tensor] = None,
-    encoder_attention_mask: Optional[torch.Tensor] = None,
-    skip_layer_mask: Optional[torch.Tensor] = None,
-    skip_layer_strategy: Optional[Any] = None, # Usando Any para compatibilidade
-    return_dict: bool = True,
-) -> torch.Tensor:
-    # Lógica de controle do TeaCache
-    if not hasattr(self, 'enable_teacache') or not self.enable_teacache:
-        # Se TeaCache estiver desabilitado, chama a função forward original
-        # (Para simplicidade aqui, replicamos a lógica padrão. Em um cenário real, você poderia
-        # ter salvo a função original antes de fazer o patch).
-        # Esta parte replica a lógica de 'ltx_video/models/transformers/transformer3d.py'
-        if attention_mask is not None and attention_mask.ndim == 2:
-            attention_mask = (1 - attention_mask.to(hidden_states.dtype)) * -10000.0
-            attention_mask = attention_mask.unsqueeze(1)
-        if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2:
-            encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0
-            encoder_attention_mask = encoder_attention_mask.unsqueeze(1)
-        hidden_states = self.patchify_proj(hidden_states)
-        if self.timestep_scale_multiplier:
-            timestep = self.timestep_scale_multiplier * timestep
-        freqs_cis = self.precompute_freqs_cis(indices_grid)
-        batch_size = hidden_states.shape[0]
-        timestep, embedded_timestep = self.adaln_single(
-            timestep.flatten(), {"resolution": None, "aspect_ratio": None},
-            batch_size=batch_size, hidden_dtype=hidden_states.dtype,
-        )
-        timestep = timestep.view(batch_size, -1, timestep.shape[-1])
-        embedded_timestep = embedded_timestep.view(batch_size, -1, embedded_timestep.shape[-1])
-        if self.caption_projection is not None:
-            encoder_hidden_states = self.caption_projection(encoder_hidden_states)
-            encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.shape[-1])
-        for block_idx, block in enumerate(self.transformer_blocks):
-            hidden_states = block(
-                hidden_states, freqs_cis=freqs_cis, attention_mask=attention_mask,
-                encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask,
-                timestep=timestep, cross_attention_kwargs=cross_attention_kwargs, class_labels=class_labels,
-                skip_layer_mask=(skip_layer_mask[block_idx] if skip_layer_mask is not None else None),
-                skip_layer_strategy=skip_layer_strategy,
-            )
-        scale_shift_values = self.scale_shift_table[None, None] + embedded_timestep[:, :, None]
-        shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1]
-        hidden_states = self.norm_out(hidden_states)
-        hidden_states = hidden_states * (1 + scale) + shift
-        hidden_states = self.proj_out(hidden_states)
-        if not return_dict: return (hidden_states,)
-        return Transformer2DModelOutput(sample=hidden_states)
-    # Lógica principal do TeaCache
-    lora_scale = 1.0
-    # Preparação dos embeddings e máscaras
-    image_rotary_emb = self.precompute_freqs_cis(indices_grid)
-    if encoder_attention_mask is not None and encoder_attention_mask.ndim == 2:
-        encoder_attention_mask = (1 - encoder_attention_mask.to(hidden_states.dtype)) * -10000.0
-        encoder_attention_mask = encoder_attention_mask.unsqueeze(1)
-    batch_size = hidden_states.size(0)
-    hidden_states = self.patchify_proj(hidden_states)
-    temb, embedded_timestep = self.adaln_single(
-        timestep.flatten(), {"resolution": None, "aspect_ratio": None},
-        batch_size=batch_size, hidden_dtype=hidden_states.dtype,
-    )
-    temb = temb.view(batch_size, -1, temb.size(-1))
-    embedded_timestep = embedded_timestep.view(batch_size, -1, embedded_timestep.size(-1))
-    if self.caption_projection is not None:
-        encoder_hidden_states = self.caption_projection(encoder_hidden_states)
-        encoder_hidden_states = encoder_hidden_states.view(batch_size, -1, hidden_states.size(-1))
-    # Lógica de decisão do TeaCache (calcular ou reusar)
-    inp = hidden_states.clone()
-    temb_ = temb.clone()
-    inp = self.transformer_blocks[0].norm1(inp)
-    num_ada_params = self.transformer_blocks[0].scale_shift_table.shape[0]
-    ada_values = self.transformer_blocks[0].scale_shift_table[None, None] + temb_.reshape(batch_size, temb_.size(1), num_ada_params, -1)
-    if self.transformer_blocks[0].adaptive_norm == "single_scale_shift":
-        shift_msa, scale_msa, _, _, _, _ = ada_values.unbind(dim=2)
-        modulated_inp = inp * (1 + scale_msa) + shift_msa
-    else: # single_scale
-        scale_msa, _, _, _ = ada_values.unbind(dim=2)
-        modulated_inp = inp * (1 + scale_msa)
-    if self.cnt == 0 or self.cnt == self.num_steps - 1:
-        should_calc = True
-        self.accumulated_rel_l1_distance = 0
-    else:
-        coefficients = [2.14700694e+01, -1.28016453e+01, 2.31279151e+00, 7.92487521e-01, 9.69274326e-03]
-        rescale_func = np.poly1d(coefficients)
-        self.accumulated_rel_l1_distance += rescale_func(((modulated_inp - self.previous_modulated_input).abs().mean() / self.previous_modulated_input.abs().mean()).cpu().item())
-        if self.accumulated_rel_l1_distance < self.rel_l1_thresh:
-            should_calc = False
-        else:
-            should_calc = True
-            self.accumulated_rel_l1_distance = 0
-    self.previous_modulated_input = modulated_inp
-    self.cnt += 1
-    if self.cnt == self.num_steps:
-        self.cnt = 0
-    # Execução do transformer
-    if not should_calc:
-        hidden_states += self.previous_residual
-    else:
-        ori_hidden_states = hidden_states.clone()
-        for block_idx, block in enumerate(self.transformer_blocks):
-            hidden_states = block(
-                hidden_states=hidden_states,
-                freqs_cis=image_rotary_emb,
-                encoder_hidden_states=encoder_hidden_states,
-                timestep=temb,
-                encoder_attention_mask=encoder_attention_mask
-            )
-        scale_shift_values = self.scale_shift_table[None, None] + embedded_timestep[:, :, None]
-        shift, scale = scale_shift_values[:, :, 0], scale_shift_values[:, :, 1]
-        hidden_states = self.norm_out(hidden_states)
-        hidden_states = hidden_states * (1 + scale) + shift
-        self.previous_residual = hidden_states - ori_hidden_states
-    output = self.proj_out(hidden_states)
-    if not return_dict:
-        return (output,)
-    return Transformer2DModelOutput(sample=output)