CARLEXsX commited on
Commit
eef9219
·
verified ·
1 Parent(s): 135cba9

Upload __init__.py

Browse files
Files changed (1) hide show
  1. __init__.py +645 -0
__init__.py ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ --- START OF MODIFIED FILE app.py ---
2
+ # Euia-AducSdr: Uma implementação aberta e funcional da arquitetura ADUC-SDR para geração de vídeo coerente.
3
+ # Copyright (C) 4 de Agosto de 2025 Carlos Rodrigues dos Santos
4
+ #
5
+ # Contato:
6
+ # Carlos Rodrigues dos Santos
7
8
+ #
9
+ # Repositórios e Projetos Relacionados:
10
+ # GitHub: https://github.com/carlex22/Aduc-sdr
11
+ # YouTube (Resultados): https://m.youtube.com/channel/UC3EgoJi_Fv7yuDpvfYNtoIQ
12
+ # Hugging Face: https://huggingface.co/spaces/Carlexx/ADUC-Sdr_Gemini_Drem0_Ltx_Video60seconds/
13
+ #
14
+ # Este programa é software livre: você pode redistribuí-lo e/ou modificá-lo
15
+ # sob os termos da Licença Pública Geral Affero da GNU como publicada pela
16
+ # Free Software Foundation, seja a versão 3 da Licença, ou
17
+ # (a seu critério) qualquer versão posterior.
18
+ #
19
+ # Este programa é distribuído na esperança de que seja útil,
20
+ # mas SEM QUALQUER GARANTIA; sem mesmo a garantia implícita de
21
+ # COMERCIALIZAÇÃO ou ADEQUAÇÃO A UM DETERMINADO FIM. Consulte a
22
+ # Licença Pública Geral Affero da GNU para mais detalhes.
23
+ #
24
+ # Você deve ter recebido uma cópia da Licença Pública Geral Affero da GNU
25
+ # junto com este programa. Se não, veja <https://www.gnu.org/licenses/>.
26
+
27
+ # --- app.py (ADUC-SDR-3.0: Diretor de Cena com Upscaling Paralelo) ---
28
+
29
+ import gradio as gr
30
+ import torch
31
+ import os
32
+ import re
33
+ import yaml
34
+ from PIL import Image, ImageOps, ExifTags
35
+ import shutil
36
+ import subprocess
37
+ import google.generativeai as genai
38
+ import numpy as np
39
+ import imageio
40
+ from pathlib import Path
41
+ import json
42
+ import time
43
+ import math
44
+ import threading
45
+
46
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
47
+
48
+ from flux_kontext_helpers import flux_kontext_singleton
49
+ from ltx_manager_helpers import ltx_manager_singleton
50
+ from ltx_upscaler_manager_helpers import ltx_upscaler_manager_singleton
51
+
52
+ WORKSPACE_DIR = "aduc_workspace"
53
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
54
+
55
+ # ======================================================================================
56
+ # SEÇÃO 1: FUNÇÕES UTILITÁRIAS E DE PROCESSAMENTO DE MÍDIA
57
+ # ======================================================================================
58
+
59
+ def robust_json_parser(raw_text: str) -> dict:
60
+ """
61
+ Analisa uma string de texto bruto para encontrar e decodificar o primeiro objeto JSON válido.
62
+ É essencial para extrair respostas estruturadas de modelos de linguagem.
63
+
64
+ Args:
65
+ raw_text (str): A string completa retornada pela IA.
66
+
67
+ Returns:
68
+ dict: Um dicionário Python representando o objeto JSON.
69
+
70
+ Raises:
71
+ ValueError: Se nenhum objeto JSON válido for encontrado ou a decodificação falhar.
72
+ """
73
+ clean_text = raw_text.strip()
74
+ try:
75
+ start_index = clean_text.find('{'); end_index = clean_text.rfind('}')
76
+ if start_index != -1 and end_index != -1 and end_index > start_index:
77
+ json_str = clean_text[start_index : end_index + 1]
78
+ return json.loads(json_str)
79
+ else: raise ValueError("Nenhum objeto JSON válido encontrado na resposta da IA.")
80
+ except json.JSONDecodeError as e: raise ValueError(f"Falha ao decodificar JSON: {e}")
81
+
82
+ def process_image_to_square(image_path: str, size: int, output_filename: str = None) -> str:
83
+ """
84
+ Processa uma imagem para um formato quadrado, redimensionando e cortando centralmente.
85
+
86
+ Args:
87
+ image_path (str): Caminho para a imagem de entrada.
88
+ size (int): A dimensão (altura e largura) da imagem de saída.
89
+ output_filename (str, optional): Nome do arquivo de saída.
90
+
91
+ Returns:
92
+ str: O caminho para a imagem processada.
93
+ """
94
+ if not image_path: return None
95
+ try:
96
+ img = Image.open(image_path).convert("RGB")
97
+ img_square = ImageOps.fit(img, (size, size), Image.Resampling.LANCZOS)
98
+ if output_filename: output_path = os.path.join(WORKSPACE_DIR, output_filename)
99
+ else: output_path = os.path.join(WORKSPACE_DIR, f"edited_ref_{time.time()}.png")
100
+ img_square.save(output_path)
101
+ return output_path
102
+ except Exception as e: raise gr.Error(f"Falha ao processar a imagem de referência: {e}")
103
+
104
+ def trim_video_to_frames(input_path: str, output_path: str, frames_to_keep: int) -> str:
105
+ """
106
+ Usa o FFmpeg para cortar um vídeo, mantendo um número específico de frames do início.
107
+
108
+ Args:
109
+ input_path (str): Caminho para o vídeo de entrada.
110
+ output_path (str): Caminho para salvar o vídeo cortado.
111
+ frames_to_keep (int): Número de frames a serem mantidos.
112
+
113
+ Returns:
114
+ str: O caminho para o vídeo cortado.
115
+ """
116
+ try:
117
+ subprocess.run(f"ffmpeg -y -v error -i \"{input_path}\" -vf \"select='lt(n,{frames_to_keep})'\" -an \"{output_path}\"", shell=True, check=True, text=True)
118
+ return output_path
119
+ except subprocess.CalledProcessError as e: raise gr.Error(f"FFmpeg falhou ao cortar vídeo: {e.stderr}")
120
+
121
+ def extract_last_n_frames_as_video(input_path: str, output_path: str, n_frames: int) -> str:
122
+ """
123
+ Usa o FFmpeg para extrair os últimos N frames de um vídeo para criar o "Eco Cinético".
124
+
125
+ Args:
126
+ input_path (str): Caminho para o vídeo de entrada.
127
+ output_path (str): Caminho para salvar o vídeo de saída (o eco).
128
+ n_frames (int): Número de frames a serem extraídos do final.
129
+
130
+ Returns:
131
+ str: O caminho para o vídeo de eco gerado.
132
+ """
133
+ try:
134
+ cmd_probe = f"ffprobe -v error -select_streams v:0 -count_frames -show_entries stream=nb_read_frames -of default=nokey=1:noprint_wrappers=1 \"{input_path}\""
135
+ result = subprocess.run(cmd_probe, shell=True, check=True, text=True, capture_output=True)
136
+ total_frames = int(result.stdout.strip())
137
+ if n_frames >= total_frames: shutil.copyfile(input_path, output_path); return output_path
138
+ start_frame = total_frames - n_frames
139
+ cmd_ffmpeg = f"ffmpeg -y -v error -i \"{input_path}\" -vf \"select='gte(n,{start_frame})'\" -vframes {n_frames} -an \"{output_path}\""
140
+ subprocess.run(cmd_ffmpeg, shell=True, check=True, text=True)
141
+ return output_path
142
+ except (subprocess.CalledProcessError, ValueError) as e: raise gr.Error(f"FFmpeg falhou ao extrair os últimos {n_frames} frames: {getattr(e, 'stderr', str(e))}")
143
+
144
+ def concatenate_final_video(fragment_paths: list, fragment_duration_frames: int, eco_video_frames: int, progress=gr.Progress()):
145
+ """
146
+ Concatena os fragmentos de vídeo gerados em uma única "Obra-Prima" final.
147
+ Fragmentos marcados como 'cut' (identificados pelo nome do arquivo)
148
+ não terão sua duração cortada para preservar a intenção do corte.
149
+ """
150
+ if not fragment_paths:
151
+ raise gr.Error("Nenhum fragmento de vídeo para concatenar.")
152
+
153
+ progress(0.1, desc="Preparando fragmentos para a montagem final...");
154
+
155
+ try:
156
+ list_file_path = os.path.abspath(os.path.join(WORKSPACE_DIR, f"concat_list_final_{time.time()}.txt"))
157
+ final_output_path = os.path.abspath(os.path.join(WORKSPACE_DIR, "masterpiece_final.mp4"))
158
+ temp_files_for_concat = []
159
+
160
+ duration_for_non_cut_fragments = int(fragment_duration_frames - eco_video_frames)
161
+ duration_for_non_cut_fragments = max(1, duration_for_non_cut_fragments)
162
+
163
+ for i, p in enumerate(fragment_paths):
164
+ is_last_fragment = (i == len(fragment_paths) - 1)
165
+
166
+ if "_cut" in os.path.basename(p) or is_last_fragment:
167
+ temp_files_for_concat.append(os.path.abspath(p))
168
+ else:
169
+ temp_path = os.path.join(WORKSPACE_DIR, f"final_temp_concat_{i}.mp4")
170
+ trim_video_to_frames(p, temp_path, duration_for_non_cut_fragments)
171
+ temp_files_for_concat.append(os.path.abspath(temp_path))
172
+
173
+ progress(0.8, desc="Concatenando clipe final...");
174
+
175
+ with open(list_file_path, "w") as f:
176
+ for p_temp in temp_files_for_concat:
177
+ f.write(f"file '{p_temp}'\n")
178
+
179
+ ffmpeg_command = f"ffmpeg -y -v error -f concat -safe 0 -i \"{list_file_path}\" -c copy \"{final_output_path}\""
180
+ subprocess.run(ffmpeg_command, shell=True, check=True, text=True)
181
+
182
+ progress(1.0, desc="Montagem final concluída!");
183
+ return final_output_path
184
+ except subprocess.CalledProcessError as e:
185
+ error_output = e.stderr if e.stderr else "Nenhuma saída de erro do FFmpeg."
186
+ raise gr.Error(f"FFmpeg falhou na concatenação final: {error_output}")
187
+ except Exception as e:
188
+ raise gr.Error(f"Um erro ocorreu durante a concatenação final: {e}")
189
+
190
+ def extract_image_exif(image_path: str) -> str:
191
+ """
192
+ Extrai metadados EXIF relevantes de uma imagem.
193
+ """
194
+ try:
195
+ img = Image.open(image_path); exif_data = img._getexif()
196
+ if not exif_data: return "No EXIF metadata found."
197
+ exif = { ExifTags.TAGS[k]: v for k, v in exif_data.items() if k in ExifTags.TAGS }
198
+ relevant_tags = ['DateTimeOriginal', 'Model', 'LensModel', 'FNumber', 'ExposureTime', 'ISOSpeedRatings', 'FocalLength']
199
+ metadata_str = ", ".join(f"{key}: {exif[key]}" for key in relevant_tags if key in exif)
200
+ return metadata_str if metadata_str else "No relevant EXIF metadata found."
201
+ except Exception: return "Could not read EXIF data."
202
+
203
+ # ======================================================================================
204
+ # SEÇÃO 2: ORQUESTRADORES DE IA (As "Etapas" da Geração)
205
+ # ======================================================================================
206
+
207
+ def run_storyboard_generation(num_fragments: int, prompt: str, reference_paths: list):
208
+ """
209
+ Orquestra a Etapa 1: O Roteiro.
210
+ """
211
+ if not reference_paths: raise gr.Error("Por favor, forneça pelo menos uma imagem de referência.")
212
+ if not GEMINI_API_KEY: raise gr.Error("Chave da API Gemini não configurada!")
213
+ main_ref_path = reference_paths[0]
214
+ exif_metadata = extract_image_exif(main_ref_path)
215
+ prompt_file = "prompts/unified_storyboard_prompt.txt"
216
+ with open(os.path.join(os.path.dirname(__file__), prompt_file), "r", encoding="utf-8") as f: template = f.read()
217
+ director_prompt = template.format(user_prompt=prompt, num_fragments=int(num_fragments), image_metadata=exif_metadata)
218
+ genai.configure(api_key=GEMINI_API_KEY)
219
+ model = genai.GenerativeModel('gemini-1.5-flash')
220
+ model_contents = [director_prompt]
221
+ for i, img_path in enumerate(reference_paths):
222
+ model_contents.append(f"Reference Image {i+1}:")
223
+ model_contents.append(Image.open(img_path))
224
+ print(f"Gerando roteiro com {len(reference_paths)} imagens de referência...")
225
+ response = model.generate_content(model_contents)
226
+ try:
227
+ storyboard_data = robust_json_parser(response.text)
228
+ storyboard = storyboard_data.get("scene_storyboard", [])
229
+ if not storyboard or len(storyboard) != int(num_fragments): raise ValueError(f"A IA não gerou o número correto de cenas. Esperado: {num_fragments}, Recebido: {len(storyboard)}")
230
+ return storyboard
231
+ except Exception as e: raise gr.Error(f"O Roteirista (Gemini) falhou ao criar o roteiro: {e}. Resposta recebida: {response.text}")
232
+
233
+ def run_keyframe_generation(storyboard, fixed_reference_paths, keyframe_resolution, global_prompt, progress=gr.Progress()):
234
+ """
235
+ Orquestra a Etapa 2: Os Keyframes.
236
+ """
237
+ if not storyboard: raise gr.Error("Nenhum roteiro para gerar keyframes.")
238
+ if not fixed_reference_paths: raise gr.Error("A imagem de referência inicial é obrigatória.")
239
+
240
+ initial_ref_image_path = fixed_reference_paths[0]
241
+ log_history = ""; generated_images_for_gallery = []
242
+ width, height = keyframe_resolution, keyframe_resolution
243
+
244
+ keyframe_paths_for_video = []
245
+ scene_history = "N/A"
246
+
247
+ wrapper_prompt_path = os.path.join(os.path.dirname(__file__), "prompts/flux_composition_wrapper_prompt.txt")
248
+ with open(wrapper_prompt_path, "r", encoding="utf-8") as f:
249
+ kontext_template = f.read()
250
+
251
+ director_prompt_path = os.path.join(os.path.dirname(__file__), "prompts/director_composition_prompt.txt")
252
+ with open(director_prompt_path, "r", encoding="utf-8") as f:
253
+ director_template = f.read()
254
+
255
+ try:
256
+ genai.configure(api_key=GEMINI_API_KEY)
257
+ model = genai.GenerativeModel('gemini-1.5-flash')
258
+
259
+ for i, scene_description in enumerate(storyboard):
260
+ progress(i / len(storyboard), desc=f"Compondo Keyframe {i+1}/{len(storyboard)} ({width}x{height})")
261
+ log_history += f"\n--- COMPONDO KEYFRAME {i+1}/{len(storyboard)} ---\n"
262
+
263
+ last_three_paths = ([initial_ref_image_path] + keyframe_paths_for_video)[-3:]
264
+
265
+ log_history += f" - Diretor de Cena está analisando o contexto...\n"
266
+ yield {keyframe_log_output: gr.update(value=log_history), keyframe_gallery_output: gr.update(value=generated_images_for_gallery), keyframe_images_state: gr.update(value=generated_images_for_gallery)}
267
+
268
+ director_prompt = director_template.format(
269
+ global_prompt=global_prompt,
270
+ scene_history=scene_history,
271
+ current_scene_desc=scene_description,
272
+ )
273
+
274
+ model_contents = []
275
+ image_map = {}
276
+ current_image_index = 1
277
+
278
+ for path in last_three_paths:
279
+ if path not in image_map.values():
280
+ image_map[current_image_index] = path
281
+ model_contents.extend([f"IMG-{current_image_index}:", Image.open(path)])
282
+ current_image_index += 1
283
+
284
+ for path in fixed_reference_paths:
285
+ if path not in image_map.values():
286
+ image_map[current_image_index] = path
287
+ model_contents.extend([f"IMG-{current_image_index}:", Image.open(path)])
288
+ current_image_index += 1
289
+
290
+ model_contents.append(director_prompt)
291
+
292
+ response_text = model.generate_content(model_contents).text
293
+ composition_prompt_with_tags = response_text.strip()
294
+
295
+ referenced_indices = [int(idx) for idx in re.findall(r'\[IMG-(\d+)\]', composition_prompt_with_tags)]
296
+
297
+ current_reference_paths = [image_map[idx] for idx in sorted(list(set(referenced_indices))) if idx in image_map]
298
+ if not current_reference_paths:
299
+ current_reference_paths = [last_three_paths[-1]]
300
+
301
+ reference_images_pil = [Image.open(p) for p in current_reference_paths]
302
+ final_kontext_prompt = re.sub(r'\[IMG-\d+\]', '', composition_prompt_with_tags).strip()
303
+
304
+ log_history += f" - Diretor de Cena decidiu usar as imagens: {[os.path.basename(p) for p in current_reference_paths]}\n"
305
+ log_history += f" - Prompt Final do Diretor: \"{final_kontext_prompt}\"\n"
306
+ scene_history += f"Scene {i+1}: {final_kontext_prompt}\n"
307
+
308
+ yield {keyframe_log_output: gr.update(value=log_history), keyframe_gallery_output: gr.update(value=generated_images_for_gallery), keyframe_images_state: gr.update(value=generated_images_for_gallery)}
309
+
310
+ final_kontext_prompt_wrapped = kontext_template.format(target_prompt=final_kontext_prompt)
311
+ output_path = os.path.join(WORKSPACE_DIR, f"keyframe_{i+1}.png")
312
+
313
+ image = flux_kontext_singleton.generate_image(
314
+ reference_images=reference_images_pil,
315
+ prompt=final_kontext_prompt_wrapped,
316
+ width=width, height=height, seed=int(time.time())
317
+ )
318
+
319
+ image.save(output_path)
320
+ keyframe_paths_for_video.append(output_path)
321
+ generated_images_for_gallery.append(output_path)
322
+
323
+ except Exception as e:
324
+ raise gr.Error(f"O Compositor (FluxKontext) ou o Diretor de Cena (Gemini) falhou: {e}")
325
+
326
+ log_history += "\nComposição de todos os keyframes concluída.\n"
327
+ final_keyframes = keyframe_paths_for_video
328
+ yield {keyframe_log_output: gr.update(value=log_history), keyframe_gallery_output: final_keyframes, keyframe_images_state: final_keyframes}
329
+
330
+ def get_initial_motion_prompt(user_prompt: str, start_image_path: str, destination_image_path: str, dest_scene_desc: str):
331
+ """
332
+ Chama a IA (Gemini) para atuar como "Cineasta Inicial".
333
+ """
334
+ if not GEMINI_API_KEY: raise gr.Error("Chave da API Gemini não configurada!")
335
+ try:
336
+ genai.configure(api_key=GEMINI_API_KEY); model = genai.GenerativeModel('gemini-1.5-flash'); prompt_file = "prompts/initial_motion_prompt.txt"
337
+ with open(os.path.join(os.path.dirname(__file__), prompt_file), "r", encoding="utf-8") as f: template = f.read()
338
+ cinematographer_prompt = template.format(user_prompt=user_prompt, destination_scene_description=dest_scene_desc)
339
+ start_img, dest_img = Image.open(start_image_path), Image.open(destination_image_path)
340
+ model_contents = ["START Image:", start_img, "DESTINATION Image:", dest_img, cinematographer_prompt]
341
+ response = model.generate_content(model_contents)
342
+ return response.text.strip()
343
+ except Exception as e: raise gr.Error(f"O Cineasta de IA (Inicial) falhou: {e}. Resposta: {getattr(e, 'text', 'No text available.')}")
344
+
345
+ def get_transition_decision(user_prompt, story_history, memory_media_path, path_image_path, destination_image_path, midpoint_scene_description, dest_scene_desc):
346
+ """
347
+ Chama a IA (Gemini) para atuar como "Diretor de Continuidade".
348
+ """
349
+ if not GEMINI_API_KEY: raise gr.Error("Chave da API Gemini não configurada!")
350
+ try:
351
+ genai.configure(api_key=GEMINI_API_KEY); model = genai.GenerativeModel('gemini-1.5-flash'); prompt_file = "prompts/transition_decision_prompt.txt"
352
+ with open(os.path.join(os.path.dirname(__file__), prompt_file), "r", encoding="utf-8") as f: template = f.read()
353
+ continuity_prompt = template.format(user_prompt=user_prompt, story_history=story_history, midpoint_scene_description=midpoint_scene_description, destination_scene_description=dest_scene_desc)
354
+ with imageio.get_reader(memory_media_path) as reader: mem_img = Image.fromarray(reader.get_data(0))
355
+ path_img, dest_img = Image.open(path_image_path), Image.open(destination_image_path)
356
+ model_contents = ["START Image (from Kinetic Echo):", mem_img, "MIDPOINT Image (Path):", path_img, "DESTINATION Image (Destination):", dest_img, continuity_prompt]
357
+ response = model.generate_content(model_contents)
358
+ decision_data = robust_json_parser(response.text)
359
+ if "transition_type" not in decision_data or "motion_prompt" not in decision_data: raise ValueError("A resposta da IA não contém as chaves 'transition_type' ou 'motion_prompt'.")
360
+ return decision_data
361
+ except Exception as e: raise gr.Error(f"O Diretor de Continuidade (IA) falhou: {e}. Resposta: {getattr(e, 'text', str(e))}")
362
+
363
+
364
+
365
+ def run_video_production(
366
+ video_resolution,
367
+ video_duration_seconds, video_fps, eco_video_frames, use_attention_slicing,
368
+ fragment_duration_frames, mid_cond_strength, dest_cond_strength, num_inference_steps,
369
+ decode_timestep, image_cond_noise_scale,
370
+ prompt_geral, keyframe_images_state, scene_storyboard, cfg,
371
+ progress=gr.Progress()
372
+ ):
373
+ """
374
+ Orquestra a Etapa 3: A Produção e Upscaling Paralelo.
375
+ """
376
+ try:
377
+ high_res_width, high_res_height = video_resolution, video_resolution
378
+ low_res_scale = 2
379
+ low_res_width = (high_res_width // low_res_scale // 8) * 8
380
+ low_res_height = (high_res_height // low_res_scale // 8) * 8
381
+
382
+ valid_keyframes = [p for p in keyframe_images_state if p is not None and os.path.exists(p)]
383
+ video_total_frames_user = int(video_duration_seconds * video_fps)
384
+ video_total_frames_ltx = int(round((float(video_total_frames_user) - 1.0) / 8.0) * 8 + 1)
385
+ if not valid_keyframes or len(valid_keyframes) < 2: raise gr.Error("São necessários pelo menos 2 keyframes válidos para produzir uma transição.")
386
+ if int(fragment_duration_frames) > video_total_frames_user: raise gr.Error(f"Duração do fragmento ({fragment_duration_frames}) não pode ser maior que a Duração Bruta ({video_total_frames_user}).")
387
+
388
+ log_history = f"\n--- FASE 3/4: Iniciando Produção (Low-Res: {low_res_width}x{low_res_height}, Final: {high_res_width}x{high_res_height})...\n"
389
+ yield {
390
+ production_log_output: log_history, video_gallery_output: [],
391
+ prod_media_start_output: None, prod_media_mid_output: gr.update(visible=False), prod_media_end_output: None
392
+ }
393
+
394
+ seed = int(time.time())
395
+ upscaled_fragments_paths = []
396
+ upscale_threads = []
397
+ story_history = ""
398
+ kinetic_memory_path = None
399
+ num_transitions = len(valid_keyframes) - 1
400
+
401
+ for i in range(num_transitions):
402
+ fragment_num = i + 1
403
+ progress(i / num_transitions, desc=f"Gerando Fragmento Low-Res {fragment_num}...")
404
+ log_history += f"\n--- FRAGMENTO {fragment_num}/{num_transitions} ---\n"
405
+ destination_frame = int(video_total_frames_ltx - 1)
406
+
407
+ if i == 0 or kinetic_memory_path is None:
408
+ start_path, destination_path = valid_keyframes[i], valid_keyframes[i+1]
409
+ dest_scene_desc = scene_storyboard[i]
410
+ log_history += f" - Início (Cena Nova): {os.path.basename(start_path)}\n - Destino: {os.path.basename(destination_path)}\n"
411
+ current_motion_prompt = get_initial_motion_prompt(prompt_geral, start_path, destination_path, dest_scene_desc)
412
+ conditioning_items_data = [(start_path, 0, 1.0), (destination_path, destination_frame, dest_cond_strength)]
413
+ transition_type = "continuous"
414
+ yield { production_log_output: log_history, prod_media_start_output: start_path, prod_media_mid_output: gr.update(visible=False), prod_media_end_output: destination_path }
415
+ else:
416
+ memory_path, path_path, destination_path = kinetic_memory_path, valid_keyframes[i], valid_keyframes[i+1]
417
+ path_scene_desc, dest_scene_desc = scene_storyboard[i-1], scene_storyboard[i]
418
+ log_history += f" - Diretor de Continuidade analisando...\n - Memória: {os.path.basename(memory_path)}\n - Caminho: {os.path.basename(path_path)}\n - Destino: {os.path.basename(destination_path)}\n"
419
+ yield { production_log_output: log_history, prod_media_start_output: gr.update(value=memory_path, visible=True), prod_media_mid_output: gr.update(value=path_path, visible=True), prod_media_end_output: destination_path }
420
+ decision_data = get_transition_decision(prompt_geral, story_history, memory_path, path_path, destination_path, midpoint_scene_description=path_scene_desc, dest_scene_desc=dest_scene_desc)
421
+ transition_type = decision_data["transition_type"]
422
+ current_motion_prompt = decision_data["motion_prompt"]
423
+ log_history += f" - Decisão: {transition_type.upper()}\n"
424
+ mid_cond_frame_calculated = int(video_total_frames_ltx - fragment_duration_frames + eco_video_frames)
425
+ conditioning_items_data = [(memory_path, 0, 1.0), (path_path, mid_cond_frame_calculated, mid_cond_strength), (destination_path, destination_frame, dest_cond_strength)]
426
+
427
+ story_history += f"\n- Ato {fragment_num + 1}: {current_motion_prompt}"
428
+ log_history += f" - Instrução do Cineasta: '{current_motion_prompt}'\n"; yield {production_log_output: log_history}
429
+
430
+ output_filename_low_res = f"fragment_{fragment_num}_lowres_{transition_type}.mp4"
431
+ full_fragment_path_low_res, _ = ltx_manager_singleton.generate_video_fragment(
432
+ motion_prompt=current_motion_prompt, conditioning_items_data=conditioning_items_data,
433
+ width=low_res_width, height=low_res_height, seed=seed, cfg=cfg, progress=progress,
434
+ video_total_frames=video_total_frames_ltx, video_fps=video_fps,
435
+ use_attention_slicing=use_attention_slicing, num_inference_steps=num_inference_steps,
436
+ decode_timestep=decode_timestep, image_cond_noise_scale=image_cond_noise_scale,
437
+ current_fragment_index=fragment_num, output_path=os.path.join(WORKSPACE_DIR, output_filename_low_res)
438
+ )
439
+ log_history += f" - LOG: Gerei {output_filename_low_res}.\n"
440
+
441
+ output_filename_high_res = f"fragment_{fragment_num}_highres_{transition_type}.mp4"
442
+ output_path_high_res = os.path.join(WORKSPACE_DIR, output_filename_high_res)
443
+
444
+ log_history += f" - Disparando upscale para {output_filename_high_res} em paralelo...\n"
445
+ upscale_thread = threading.Thread(
446
+ target=ltx_upscaler_manager_singleton.upscale_video_fragment,
447
+ args=(full_fragment_path_low_res, output_path_high_res, video_fps)
448
+ )
449
+ upscale_thread.start()
450
+ upscale_threads.append((upscale_thread, output_path_high_res))
451
+
452
+ is_last_fragment = (i == num_transitions - 1)
453
+
454
+ if is_last_fragment:
455
+ log_history += " - Último fragmento. Mantendo duração total (low-res).\n"
456
+ kinetic_memory_path = None
457
+ elif transition_type == "cut":
458
+ log_history += " - CORTE DE CENA: Memória reiniciada.\n"
459
+ kinetic_memory_path = None
460
+ else:
461
+ trimmed_fragment_path = os.path.join(WORKSPACE_DIR, f"fragment_{fragment_num}_trimmed_lowres.mp4")
462
+ trim_video_to_frames(full_fragment_path_low_res, trimmed_fragment_path, int(fragment_duration_frames))
463
+ eco_output_path = os.path.join(WORKSPACE_DIR, f"eco_from_frag_{fragment_num}.mp4")
464
+ kinetic_memory_path = extract_last_n_frames_as_video(trimmed_fragment_path, eco_output_path, int(eco_video_frames))
465
+ log_history += f" - CONTINUIDADE: Eco criado (low-res): {os.path.basename(kinetic_memory_path)}\n"
466
+
467
+ current_finished_fragments = [path for t, path in upscale_threads if not t.is_alive()]
468
+ yield {production_log_output: log_history, video_gallery_output: current_finished_fragments}
469
+
470
+ progress(0.9, desc="Aguardando finalização dos upscales...")
471
+ log_history += "\nProdução low-res concluída. Aguardando todos os upscales...\n"
472
+ yield {production_log_output: log_history}
473
+
474
+ for thread, path in upscale_threads:
475
+ thread.join()
476
+ upscaled_fragments_paths.append(path)
477
+
478
+ progress(1.0, desc="Produção e upscaling concluídos.")
479
+ log_history += "\nTodos os upscales foram finalizados. Pronto para montar o vídeo final.\n"
480
+ yield {
481
+ production_log_output: log_history,
482
+ video_gallery_output: upscaled_fragments_paths,
483
+ fragment_list_state: upscaled_fragments_paths
484
+ }
485
+ except Exception as e: raise gr.Error(f"A Produção de Vídeo (LTX) falhou: {e}")
486
+
487
+ # ======================================================================================
488
+ # SEÇÃO 3: DEFINIÇÃO DA INTERFACE GRÁFICA (UI com Gradio)
489
+ # ======================================================================================
490
+
491
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
492
+ gr.Markdown(f"# NOVIM-13.1 (Painel de Controle do Diretor)\n*Arquitetura ADUC-SDR com Upscaling Paralelo*")
493
+
494
+ if os.path.exists(WORKSPACE_DIR): shutil.rmtree(WORKSPACE_DIR)
495
+ os.makedirs(WORKSPACE_DIR); Path("prompts").mkdir(exist_ok=True)
496
+
497
+ scene_storyboard_state = gr.State([])
498
+ keyframe_images_state = gr.State([])
499
+ fragment_list_state = gr.State([])
500
+ prompt_geral_state = gr.State("")
501
+ processed_ref_paths_state = gr.State([])
502
+ fragment_duration_state = gr.State()
503
+ eco_frames_state = gr.State()
504
+
505
+ gr.Markdown("## CONFIGURAÇÕES GLOBAIS DE RESOLUÇÃO")
506
+ with gr.Row():
507
+ video_resolution_selector = gr.Radio([512, 720, 1024], value=1024, label="Resolução Final do Vídeo (px)")
508
+ keyframe_resolution_selector = gr.Radio([512, 720, 1024], value=512, label="Resolução dos Keyframes (px)")
509
+
510
+ gr.Markdown("--- \n ## ETAPA 1: O ROTEIRO (IA Roteirista)")
511
+ with gr.Row():
512
+ with gr.Column(scale=1):
513
+ prompt_input = gr.Textbox(label="Ideia Geral (Prompt)")
514
+ num_fragments_input = gr.Slider(2, 50, 4, step=1, label="Nº de Keyframes a Gerar")
515
+ reference_gallery = gr.Gallery(
516
+ label="Imagens de Referência (A primeira é a principal)",
517
+ type="filepath",
518
+ columns=4, rows=1, object_fit="contain", height="auto"
519
+ )
520
+ director_button = gr.Button("▶️ 1. Gerar Roteiro", variant="primary")
521
+ with gr.Column(scale=2): storyboard_to_show = gr.JSON(label="Roteiro de Cenas Gerado (em Inglês)")
522
+
523
+ gr.Markdown("--- \n ## ETAPA 2: OS KEYFRAMES (IA Compositor & Diretor de Cena)")
524
+ with gr.Row():
525
+ with gr.Column(scale=2):
526
+ gr.Markdown("O Diretor de Cena IA irá analisar as referências e o roteiro para compor cada keyframe de forma autônoma.")
527
+ photographer_button = gr.Button("▶️ 2. Compor Imagens-Chave em Cadeia", variant="primary")
528
+ keyframe_gallery_output = gr.Gallery(label="Galeria de Keyframes Gerados", object_fit="contain", height="auto", type="filepath", interactive=False)
529
+ with gr.Column(scale=1):
530
+ keyframe_log_output = gr.Textbox(label="Diário de Bordo do Compositor", lines=25, interactive=False)
531
+
532
+ gr.Markdown("--- \n ## ETAPA 3: A PRODUÇÃO (IA Cineasta & Câmera)")
533
+ with gr.Row():
534
+ with gr.Column(scale=1):
535
+ cfg_slider = gr.Slider(0.5, 10.0, 1.0, step=0.1, label="CFG (Guidance Scale)")
536
+ with gr.Accordion("Controles Avançados de Timing e Performance", open=False):
537
+ video_duration_slider = gr.Slider(label="Duração da Geração Bruta (s)", minimum=2.0, maximum=10.0, value=6.0, step=0.5)
538
+ video_fps_radio = gr.Radio(choices=[8, 16, 24, 32], value=24, label="FPS do Vídeo")
539
+ num_inference_steps_slider = gr.Slider(label="Etapas de Inferência (Low-Res)", minimum=10, maximum=50, value=28, step=1)
540
+ slicing_checkbox = gr.Checkbox(label="Usar Attention Slicing (Economiza VRAM)", value=True)
541
+ gr.Markdown("---"); gr.Markdown("#### Controles de Duração (Arquitetura Eco + Déjà Vu)")
542
+ fragment_duration_slider = gr.Slider(label="Duração de Cada Fragmento (% da Geração Bruta)", minimum=1, maximum=100, value=75, step=1)
543
+ eco_frames_slider = gr.Slider(label="Tamanho do Eco Cinético (Frames)", minimum=4, maximum=48, value=8, step=1)
544
+ mid_cond_strength_slider = gr.Slider(label="Força do 'Caminho'", minimum=0.1, maximum=1.0, value=0.5, step=0.05)
545
+ dest_cond_strength_slider = gr.Slider(label="Força do 'Destino'", minimum=0.1, maximum=1.0, value=1.0, step=0.05)
546
+ gr.Markdown("---"); gr.Markdown("#### Controles do VAE (Avançado)")
547
+ decode_timestep_slider = gr.Slider(label="VAE Decode Timestep", minimum=0.0, maximum=0.2, value=0.05, step=0.005)
548
+ image_cond_noise_scale_slider = gr.Slider(label="VAE Image Cond Noise Scale", minimum=0.0, maximum=0.1, value=0.025, step=0.005)
549
+
550
+ animator_button = gr.Button("▶️ 3. Produzir Cenas", variant="primary")
551
+ with gr.Accordion("Visualização das Mídias de Condicionamento (Ao Vivo)", open=True):
552
+ with gr.Row():
553
+ prod_media_start_output = gr.Video(label="Mídia Inicial (Eco/K1)", interactive=False)
554
+ prod_media_mid_output = gr.Image(label="Mídia do Caminho (K_i-1)", interactive=False, visible=False)
555
+ prod_media_end_output = gr.Image(label="Mídia de Destino (K_i)", interactive=False)
556
+ production_log_output = gr.Textbox(label="Diário de Bordo da Produção", lines=10, interactive=False)
557
+ with gr.Column(scale=1): video_gallery_output = gr.Gallery(label="Fragmentos Gerados (High-Res)", object_fit="contain", height="auto", type="video")
558
+
559
+ gr.Markdown(f"--- \n ## ETAPA 4: PÓS-PRODUÇÃO (Montagem Final)")
560
+ with gr.Row():
561
+ with gr.Column():
562
+ editor_button = gr.Button("▶️ 4. Montar Vídeo Final", variant="primary")
563
+ final_video_output = gr.Video(label="A Obra-Prima Final")
564
+
565
+ # ... (Markdown de explicação da Arquitetura) ...
566
+
567
+ def process_and_run_storyboard(num_fragments, prompt, gallery_files, keyframe_resolution):
568
+ if not gallery_files:
569
+ raise gr.Error("Por favor, suba pelo menos uma imagem de referência na galeria.")
570
+
571
+ raw_paths = [item['name'] for item in gallery_files]
572
+ processed_paths = []
573
+ for i, path in enumerate(raw_paths):
574
+ filename = f"processed_ref_{i}_{keyframe_resolution}x{keyframe_resolution}.png"
575
+ processed_path = process_image_to_square(path, keyframe_resolution, filename)
576
+ processed_paths.append(processed_path)
577
+
578
+ storyboard = run_storyboard_generation(num_fragments, prompt, processed_paths)
579
+ return storyboard, prompt, processed_paths
580
+
581
+ director_button.click(
582
+ fn=process_and_run_storyboard,
583
+ inputs=[num_fragments_input, prompt_input, reference_gallery, keyframe_resolution_selector],
584
+ outputs=[scene_storyboard_state, prompt_geral_state, processed_ref_paths_state]
585
+ ).success(fn=lambda s: s, inputs=[scene_storyboard_state], outputs=[storyboard_to_show])
586
+
587
+ photographer_button.click(
588
+ fn=run_keyframe_generation,
589
+ inputs=[scene_storyboard_state, processed_ref_paths_state, keyframe_resolution_selector, prompt_geral_state],
590
+ outputs=[keyframe_log_output, keyframe_gallery_output, keyframe_images_state]
591
+ )
592
+
593
+ def updated_animator_click(
594
+ video_resolution,
595
+ video_duration_seconds, video_fps, eco_video_frames, use_attention_slicing,
596
+ fragment_duration_percentage, mid_cond_strength, dest_cond_strength, num_inference_steps,
597
+ decode_timestep, image_cond_noise_scale,
598
+ prompt_geral, keyframe_images_state, scene_storyboard, cfg, progress=gr.Progress()):
599
+
600
+ total_frames = video_duration_seconds * video_fps
601
+ fragment_duration_in_frames = int(math.floor((fragment_duration_percentage / 100.0) * total_frames))
602
+ fragment_duration_in_frames = max(1, fragment_duration_in_frames)
603
+
604
+ for update in run_video_production(
605
+ video_resolution,
606
+ video_duration_seconds, video_fps, eco_video_frames, use_attention_slicing,
607
+ fragment_duration_in_frames, mid_cond_strength, dest_cond_strength, num_inference_steps,
608
+ decode_timestep, image_cond_noise_scale,
609
+ prompt_geral, keyframe_images_state, scene_storyboard, cfg, progress):
610
+ yield update
611
+
612
+ yield {
613
+ fragment_duration_state: fragment_duration_in_frames,
614
+ eco_frames_state: eco_video_frames
615
+ }
616
+
617
+ animator_button.click(
618
+ fn=updated_animator_click,
619
+ inputs=[
620
+ video_resolution_selector,
621
+ video_duration_slider, video_fps_radio, eco_frames_slider, slicing_checkbox,
622
+ fragment_duration_slider, mid_cond_strength_slider, dest_cond_strength_slider, num_inference_steps_slider,
623
+ decode_timestep_slider, image_cond_noise_scale_slider,
624
+ prompt_geral_state, keyframe_images_state, scene_storyboard_state, cfg_slider
625
+ ],
626
+ outputs=[
627
+ production_log_output, video_gallery_output, fragment_list_state,
628
+ prod_media_start_output, prod_media_mid_output, prod_media_end_output,
629
+ fragment_duration_state, eco_frames_state
630
+ ]
631
+ )
632
+
633
+ editor_button.click(
634
+ fn=concatenate_final_video,
635
+ inputs=[fragment_list_state, fragment_duration_state, eco_frames_state],
636
+ outputs=[final_video_output]
637
+ )
638
+
639
+ if __name__ == "__main__":
640
+ if os.path.exists(WORKSPACE_DIR): shutil.rmtree(WORKSPACE_DIR)
641
+ os.makedirs(WORKSPACE_DIR); Path("prompts").mkdir(exist_ok=True)
642
+
643
+ demo.queue().launch(server_name="0.0.0.0", share=True)
644
+ --- END OF MODIFIED FILE app.py ---
645
+ ```