Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on 25 days ago

Commit

c381ead

verified ·

1 Parent(s): c43a720

Update app.py

Browse files

Files changed (1) hide show

app.py +201 -170

app.py CHANGED Viewed

@@ -1,20 +1,15 @@
 """
-🔥 PHOENIX Retention Research Platform v2.0 COMPLETE
-Brumby-inspired Retraining + All v1.4.3 Fixes
-✅ v2.0 NEW: Fine-tuning 파이프라인 (Brumby-style Retraining)
-✅ v2.0 NEW: 3-Phase Dataset 지원
-✅ v2.0 NEW: 비용 계산기
-✅ v1.4.3: forward() 시그니처 Transformers 호환
-✅ v1.4.3: dtype 불일치 수정 (bfloat16 지원)
-✅ v1.4.3: Embedding Tying 자동 처리
-✅ Model Structure Pre-Analysis
-✅ Qwen3 Model Support
-✅ GQA Support
-✅ HuggingFace Hub Integration
-VIDraft AI Research Lab - Complete Integrated Version v2.0
-Based on Manifest AI's Brumby-14B Success
 """
 import gradio as gr
@@ -46,10 +41,13 @@ import os
 from huggingface_hub import HfApi, create_repo
 # =====================================================
-# 전역 설정
 # =====================================================
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 STORAGE_PATH = "/data"
 DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
 MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
@@ -61,13 +59,15 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
 Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
-print(f"🔥 PHOENIX Platform v2.0 initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
 print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 if HF_TOKEN:
     print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
-else:
-    print(f"⚠️ HuggingFace Token not found (upload disabled)")
 # =====================================================
 # 모델 구조 분석 함수
@@ -84,15 +84,14 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
         config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
         print(f"✅ Config loaded")
-        print(f"   Architecture: {config.architectures if hasattr(config, 'architectures') else 'Unknown'}")
-        print(f"   Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}")
-        print(f"\n📦 Loading model structure...")
         model = AutoModelForCausalLM.from_pretrained(
             model_url,
             trust_remote_code=True,
             torch_dtype=torch.float16,
-            device_map="cpu"
         )
         analysis = {
@@ -108,15 +107,13 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
             'layer_path': None,
         }
-        print(f"\n🔍 Analyzing layer structure...")
         layers = None
         layer_path = None
         possible_paths = [
             ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
             ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
-            ('layers', lambda m: m.layers if hasattr(m, 'layers') else None),
         ]
         for path_name, path_fn in possible_paths:
@@ -124,49 +121,29 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
             if result is not None:
                 layers = result
                 layer_path = path_name
-                print(f"   ✅ Found layers at: {path_name}")
                 break
-        if layers is None:
-            print(f"   ❌ No layers found!")
-            analysis['error'] = 'No layers found'
-            return analysis
-        analysis['total_layers'] = len(layers)
-        analysis['layer_path'] = layer_path
-        print(f"   Total Layers: {len(layers)}")
-        if len(layers) > 0:
-            first_layer = layers[0]
-            print(f"\n🔬 Analyzing first layer...")
-            if hasattr(first_layer, 'self_attn'):
-                analysis['has_self_attn'] = True
-                attn = first_layer.self_attn
-                print(f"   ✅ Has self_attn")
-                if hasattr(attn, 'q_proj'):
-                    q_shape = attn.q_proj.weight.shape
-                    k_shape = attn.k_proj.weight.shape
-                    print(f"   Q projection: {q_shape}")
-                    print(f"   K projection: {k_shape}")
-                    if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
-                        head_dim = q_shape[0] // config.num_attention_heads
-                        analysis['head_dim'] = head_dim
-                        print(f"   Calculated head_dim: {head_dim}")
-                    if k_shape[0] != q_shape[0]:
-                        print(f"   ✅ GQA detected!")
-                        analysis['gqa_detected'] = True
-                    else:
-                        analysis['gqa_detected'] = False
-                    analysis['q_dim'] = q_shape[0]
-                    analysis['k_dim'] = k_shape[0]
         print(f"\n{'='*80}\n")
@@ -177,10 +154,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
     except Exception as e:
         import traceback
-        error_msg = traceback.format_exc()
-        print(f"\n❌ Structure analysis failed:")
-        print(error_msg)
         return {
             'model_url': model_url,
             'error': str(e),
@@ -189,7 +163,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
 # =====================================================
-# PHOENIX Retention with GQA Support
 # =====================================================
 class MultiScaleRetention(nn.Module):
@@ -233,7 +207,7 @@ class MultiScaleRetention(nn.Module):
         )
     def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
-        """Repeat K/V heads to match Q heads (GQA)"""
         batch, num_key_value_heads, slen, head_dim = hidden_states.shape
         if n_rep == 1:
             return hidden_states
@@ -255,13 +229,12 @@ class MultiScaleRetention(nn.Module):
         past_key_values: Optional[Tuple[torch.Tensor]] = None,
         **kwargs
     ):
-        """O(n) Retention with GQA support"""
         batch_size, seq_len, _ = hidden_states.shape
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
-        # ✅ v1.4.3 FIX: dtype과 device 모두 일치
         if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
             self.to(device=target_device, dtype=target_dtype)
@@ -348,7 +321,7 @@ class MultiScaleRetention(nn.Module):
 class HierarchicalRetention(nn.Module):
-    """PHOENIX Hierarchical Retention with GQA"""
     def __init__(self, config, layer_idx=0):
         super().__init__()
@@ -386,7 +359,6 @@ class HierarchicalRetention(nn.Module):
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
-        # ✅ v1.4.3 FIX: dtype과 device 모두 일치
         if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
             self.to(device=target_device, dtype=target_dtype)
@@ -432,18 +404,16 @@ class HierarchicalRetention(nn.Module):
 # =====================================================
 def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
-    """Transformer Attention → PHOENIX Retention (GQA Support)"""
     print("🔄 Starting Attention → Retention conversion...")
     replaced_count = 0
     total_layers = 0
     layers = None
-    layer_path = None
     if structure_info and structure_info.get('layer_path'):
         layer_path = structure_info['layer_path']
-        print(f"   Using structure info: {layer_path}")
         if layer_path == 'model.layers':
             if hasattr(model, 'model') and hasattr(model.model, 'layers'):
@@ -462,8 +432,6 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
             result = path_fn(model)
             if result is not None:
                 layers = result
-                layer_path = path_name
-                print(f"   ✅ Found layers at: {path_name}")
                 break
     if layers is None:
@@ -509,7 +477,7 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
 # =====================================================
-# v2.0 NEW: Fine-tuning 파이프라인
 # =====================================================
 def finetune_retention_model(
@@ -519,42 +487,74 @@ def finetune_retention_model(
     batch_size: int = 4,
     learning_rate: float = 1e-5,
     output_dir: str = "/data/finetuning_temp",
-    use_3phase: bool = True,
 ):
     """
-    🆕 v2.0: Brumby-style Retraining
     """
     print("\n" + "="*80)
-    print("🔥 PHOENIX RETRAINING - Brumby Style (v2.0)")
     print("="*80)
     print(f"   Target Steps: {num_steps}")
-    print(f"   Batch Size: {batch_size}")
     print(f"   Learning Rate: {learning_rate}")
     start_time = time.time()
-    # Prepare dataset
     train_dataset = prepare_simple_dataset(
         tokenizer=tokenizer,
         num_steps=num_steps,
-        batch_size=batch_size
     )
-    # Training arguments
     training_args = TrainingArguments(
         output_dir=output_dir,
         num_train_epochs=1,
-        per_device_train_batch_size=batch_size,
         learning_rate=learning_rate,
         warmup_steps=100,
         logging_steps=50,
         save_steps=1000,
-        max_steps=num_steps,
-        fp16=True,
-        gradient_accumulation_steps=8,
-        dataloader_num_workers=2,
         remove_unused_columns=False,
         report_to="none",
     )
     # Data collator
@@ -563,7 +563,7 @@ def finetune_retention_model(
         mlm=False
     )
-    # Trainer
     trainer = Trainer(
         model=model,
         args=training_args,
@@ -573,13 +573,16 @@ def finetune_retention_model(
     )
     # Train!
-    print(f"\n🚀 Starting Fine-tuning...")
     trainer.train()
     elapsed = time.time() - start_time
     print(f"\n✅ Fine-tuning Complete!")
     print(f"   Time: {elapsed/60:.1f} minutes")
     print(f"="*80 + "\n")
     return model
@@ -591,7 +594,7 @@ def prepare_simple_dataset(
     batch_size: int,
     max_length: int = 2048,
 ):
-    """Simple dataset preparation"""
     print(f"\n📊 Preparing Dataset...")
     num_samples = num_steps * batch_size
@@ -620,7 +623,8 @@ def prepare_simple_dataset(
     tokenized = dataset.map(
         tokenize_function,
         batched=True,
-        remove_columns=dataset.column_names
     )
     print(f"   ✅ Tokenized: {len(tokenized)} samples")
@@ -632,14 +636,14 @@ def estimate_finetuning_cost(
     model_size: str,
     num_steps: int,
     batch_size: int,
-    gpu_type: str = "A100",
 ) -> Dict:
-    """🆕 v2.0: ���용 계산기"""
     gpu_costs = {
         "H100": 3.0,
         "A100": 2.0,
         "A10G": 1.0,
-        "T4": 0.5,
     }
     model_step_times = {
@@ -650,20 +654,27 @@ def estimate_finetuning_cost(
         "14B": 6.0,
     }
     step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
-    total_seconds = num_steps * step_time
     total_hours = total_seconds / 3600
-    total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0)
     return {
         'hours': round(total_hours, 2),
         'cost_usd': round(total_cost_usd, 2),
         'cost_krw': round(total_cost_usd * 1300, 0),
     }
 # =====================================================
-# Custom Modeling Code 생성
 # =====================================================
 def generate_modeling_phoenix_code():
@@ -885,17 +896,16 @@ AutoConfig.register("phoenix", PhoenixConfig)
 # =====================================================
-# 저장 함수
 # =====================================================
 def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
-    """PHOENIX 모델 저장 v2.0"""
     output_path = Path(output_path)
     output_path.mkdir(parents=True, exist_ok=True)
     print(f"\n💾 Saving PHOENIX model...")
-    # Embedding Tying
     if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
         if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
             model.lm_head.weight = model.model.embed_tokens.weight
@@ -903,12 +913,10 @@ def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_u
     model.save_pretrained(output_path)
     tokenizer.save_pretrained(output_path)
-    # Custom code
     modeling_code = generate_modeling_phoenix_code()
     with open(output_path / "modeling_phoenix.py", "w") as f:
         f.write(modeling_code)
-    # Config
     config_path = output_path / "config.json"
     if config_path.exists():
         with open(config_path, "r") as f:
@@ -924,13 +932,13 @@ def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_u
         with open(config_path, "w") as f:
             json.dump(config_dict, f, indent=2)
-    # Metadata
     with open(output_path / 'phoenix_metadata.json', 'w') as f:
         json.dump(metadata, f, indent=2)
-    # README
     readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
 ## Features
 - ✅ Brumby-style Retraining
 - ✅ O(n) Complexity
@@ -948,19 +956,15 @@ model = AutoModelForCausalLM.from_pretrained(
 )
 ```
-**VIDraft AI Research Lab** | PHOENIX v2.0
 """
     with open(output_path / "README.md", "w") as f:
         f.write(readme)
-    print(f"   ✅ Model saved to {output_path}")
-# =====================================================
-# 업로드 함수
-# =====================================================
 def upload_to_huggingface_hub(
     model_path: str,
     original_model_url: str,
@@ -968,7 +972,7 @@ def upload_to_huggingface_hub(
     private: bool = True,
     token: str = None,
 ) -> Tuple[bool, str, str]:
-    """Upload PHOENIX model to Hub"""
     if token is None:
         token = HF_TOKEN
@@ -1010,12 +1014,8 @@ def upload_to_huggingface_hub(
         return False, "", f"❌ Upload failed: {e}"
-# =====================================================
-# 평가 함수
-# =====================================================
 def evaluate_model_quality(model, tokenizer):
-    """모델 품질 평가"""
     test_prompts = [
         "The capital of France is",
         "In machine learning,",
@@ -1053,7 +1053,7 @@ def evaluate_model_quality(model, tokenizer):
 # =====================================================
-# 버닝 함수 (v2.0 통합)
 # =====================================================
 def burn_model_with_finetuning(
@@ -1064,10 +1064,11 @@ def burn_model_with_finetuning(
     num_steps: int = 3000,
     batch_size: int = 4,
     learning_rate: float = 1e-5,
 ):
-    """🆕 v2.0: Zero-shot + Optional Fine-tuning"""
     print("="*80)
-    print("🔥 PHOENIX Model Burning v2.0")
     print("="*80)
     output_path = Path(output_dir)
@@ -1078,23 +1079,26 @@ def burn_model_with_finetuning(
         print(f"\n🔍 STEP 1: Structure Analysis...")
         structure_info = analyze_model_structure(model_url)
-        # STEP 2: Load Model
-        print(f"\n📥 STEP 2: Loading model...")
         start_time = time.time()
         config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
         model = AutoModelForCausalLM.from_pretrained(
             model_url,
             trust_remote_code=True,
             torch_dtype=torch.float16,
-        ).to(DEVICE)
         tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         load_time = time.time() - start_time
-        print(f"✅ Loaded in {load_time:.1f}s")
         # STEP 3: Convert
         print(f"\n🔄 STEP 3: Converting Attention → Retention...")
@@ -1111,9 +1115,9 @@ def burn_model_with_finetuning(
         print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
-        # 🆕 STEP 4: Fine-tuning (Optional)
         if enable_finetuning:
-            print(f"\n🚀 STEP 4: Fine-tuning (Brumby-style)...")
             ft_start = time.time()
             model = finetune_retention_model(
@@ -1122,13 +1126,14 @@ def burn_model_with_finetuning(
                 num_steps=num_steps,
                 batch_size=batch_size,
                 learning_rate=learning_rate,
             )
             ft_time = time.time() - ft_start
             print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
         else:
             ft_time = 0
-            print(f"\n⏭️ STEP 4: Fine-tuning skipped (enable for better quality)")
         # STEP 5: Evaluate
         print(f"\n📊 STEP 5: Evaluating...")
@@ -1146,6 +1151,8 @@ def burn_model_with_finetuning(
             'quality_score': quality_score,
             'finetuned': enable_finetuning,
             'finetuning_steps': num_steps if enable_finetuning else 0,
             'timestamp': datetime.now().isoformat(),
         }
@@ -1160,14 +1167,15 @@ def burn_model_with_finetuning(
             'quality_score': quality_score,
             'total_time': total_time,
             'finetuned': enable_finetuning,
             'structure_info': structure_info,
         }
         print(f"\n{'='*80}")
-        print(f"✅ Burning Complete!")
         print(f"   Model: {output_path}")
         print(f"   Quality: {quality_score:.2f}/1.00")
-        print(f"   Fine-tuned: {enable_finetuning}")
         print(f"{'='*80}\n")
         return result
@@ -1182,7 +1190,7 @@ def burn_model_with_finetuning(
 # =====================================================
-# Database
 # =====================================================
 class ExperimentDatabase:
@@ -1202,6 +1210,7 @@ class ExperimentDatabase:
                     conversion_rate REAL,
                     quality_score REAL,
                     finetuned BOOLEAN,
                     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
                 )
             """)
@@ -1212,8 +1221,8 @@ class ExperimentDatabase:
             cursor = conn.cursor()
             cursor.execute("""
                 INSERT INTO burning_history
-                (model_url, output_path, hub_url, conversion_rate, quality_score, finetuned)
-                VALUES (?, ?, ?, ?, ?, ?)
             """, (
                 info.get('model_url'),
                 info.get('output_path'),
@@ -1221,6 +1230,7 @@ class ExperimentDatabase:
                 info.get('conversion_rate'),
                 info.get('quality_score'),
                 info.get('finetuned'),
             ))
             conn.commit()
             return cursor.lastrowid
@@ -1248,11 +1258,12 @@ def burn_phoenix_model_ui(
     ft_steps,
     ft_batch,
     ft_lr,
     upload_hub,
     hub_repo,
     hub_private,
 ):
-    """Gradio UI 함수"""
     try:
         if not model_url.strip():
@@ -1263,11 +1274,11 @@ def burn_phoenix_model_ui(
         output_dir = f"{MODELS_PATH}/{output_name}"
-        # 🆕 v2.0: 비용 추정
         if enable_finetuning:
             model_size = "0.6B" if "0.6B" in model_url else "1.5B"
-            cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch)
-            print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h)")
         # Burn
         result = burn_model_with_finetuning(
@@ -1278,6 +1289,7 @@ def burn_phoenix_model_ui(
             num_steps=ft_steps,
             batch_size=ft_batch,
             learning_rate=ft_lr,
         )
         if result['status'] != 'success':
@@ -1301,11 +1313,15 @@ def burn_phoenix_model_ui(
             'conversion_rate': result['conversion_rate'],
             'quality_score': result['quality_score'],
             'finetuned': enable_finetuning,
         })
         # Output
         output_md = f"""
-# 🔥 PHOENIX v2.0 Burning Complete!
 ## Model Info
 - **Original**: {model_url}
@@ -1313,23 +1329,22 @@ def burn_phoenix_model_ui(
 - **Conversion**: {result['conversion_rate']*100:.1f}%
 - **Quality**: {result['quality_score']:.2f}/1.00
 - **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
-## Hub Status
 """
         if hub_url:
             output_md += f"""
 ✅ **Uploaded**: [{hub_url}]({hub_url})
 ```python
 model = AutoModelForCausalLM.from_pretrained(
     "{hub_url.replace('https://huggingface.co/', '')}",
-    trust_remote_code=True
 )
 ```
 """
-        else:
-            output_md += "⏭️ **Upload Skipped**"
         # Plot
         fig = go.Figure()
@@ -1338,7 +1353,7 @@ model = AutoModelForCausalLM.from_pretrained(
             y=[result['conversion_rate'], result['quality_score']],
             marker_color=['#3b82f6', '#10b981']
         ))
-        fig.update_layout(title="Metrics", yaxis_range=[0, 1])
         return output_md, fig
@@ -1348,7 +1363,7 @@ model = AutoModelForCausalLM.from_pretrained(
 def view_history():
-    """View history"""
     try:
         history = db.get_history(20)
         if not history:
@@ -1361,7 +1376,8 @@ def view_history():
             x='timestamp',
             y='quality_score',
             color='finetuned',
-            title='Burning History'
         )
         return f"## History\n\n{df.to_markdown(index=False)}", fig
@@ -1373,16 +1389,16 @@ def view_history():
 # Gradio App
 # =====================================================
-with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    # 🔥 PHOENIX v2.0 - Brumby-inspired Retraining
-    **Complete Integrated Version**
-    🆕 **v2.0 NEW**: Fine-tuning 파이프라인 (Brumby-style)
-    ✅ v1.4.3: forward() Transformers 호환
-    ✅ v1.4.3: dtype 수정 (bfloat16)
     ✅ GQA Support | O(n) Complexity
     ---
@@ -1401,35 +1417,47 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
                     burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
                     gr.Markdown("---")
-                    gr.Markdown("### 🆕 Fine-tuning (v2.0)")
                     burn_ft_enable = gr.Checkbox(
                         value=False,
                         label="🚀 Enable Fine-tuning (Brumby-style)",
-                        info="Required for quality output!"
                     )
                     burn_ft_steps = gr.Slider(
                         1000, 10000, 3000,
                         step=100,
-                        label="Steps (Brumby used 3000)",
                         visible=False
                     )
-                    burn_ft_batch = gr.Slider(1, 16, 4, step=1, label="Batch Size", visible=False)
                     burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
                     def toggle_ft(enabled):
                         return [
                             gr.update(visible=enabled),
                             gr.update(visible=enabled),
                             gr.update(visible=enabled),
                         ]
                     burn_ft_enable.change(
                         toggle_ft,
                         [burn_ft_enable],
-                        [burn_ft_steps, burn_ft_batch, burn_ft_lr]
                     )
                     gr.Markdown("---")
@@ -1449,7 +1477,7 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
                 burn_phoenix_model_ui,
                 [
                     burn_url, burn_hier, burn_name,
-                    burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr,
                     burn_upload, burn_repo, burn_private
                 ],
                 [burn_output, burn_plot]
@@ -1468,16 +1496,19 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"""
     ---
-    ## 🔥 PHOENIX v2.0
-    **What's New**:
-    - 🆕 Brumby-style Fine-tuning Pipeline
-    - 🆕 3-Phase Dataset Support
-    - 🆕 Cost Calculator
-    - ✅ All v1.4.3 Fixes Included
     **Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
-    **VIDraft AI Research Lab** | PHOENIX v2.0 Complete
     """)

 """
+🔥 PHOENIX Retention Research Platform v2.0 - MULTI-GPU OPTIMIZED
+H100 x 8 GPU 최적화 버전
+✅ v2.0 NEW: Multi-GPU (8x H100) 최적화
+✅ v2.0 NEW: Accelerate 통합
+✅ v2.0 NEW: DeepSpeed ZeRO-3 지원
+✅ v2.0 NEW: Gradient Checkpointing
+✅ Fine-tuning 파이프라인 (Brumby-style)
+✅ 모든 v1.4.3 수정사항 포함
+VIDraft AI Research Lab - Multi-GPU Version v2.0
 """
 import gradio as gr
 from huggingface_hub import HfApi, create_repo
 # =====================================================
+# 전역 설정 - MULTI-GPU
 # =====================================================
+# GPU 설정
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+NUM_GPUS = torch.cuda.device_count()
 STORAGE_PATH = "/data"
 DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
 MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
 Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
+print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
 print(f"💾 Storage: {STORAGE_PATH}")
 print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
+print(f"🚀 GPUs Available: {NUM_GPUS}")
+if NUM_GPUS > 0:
+    for i in range(NUM_GPUS):
+        print(f"   GPU {i}: {torch.cuda.get_device_name(i)}")
 if HF_TOKEN:
     print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
 # =====================================================
 # 모델 구조 분석 함수
         config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
         print(f"✅ Config loaded")
+        # ✅ Multi-GPU: CPU로만 로드 (분석용)
+        print(f"\n📦 Loading model structure (CPU only)...")
         model = AutoModelForCausalLM.from_pretrained(
             model_url,
             trust_remote_code=True,
             torch_dtype=torch.float16,
+            device_map="cpu"  # Analysis만 CPU에서
         )
         analysis = {
             'layer_path': None,
         }
+        # Layer 분석
         layers = None
         layer_path = None
         possible_paths = [
             ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
             ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
         ]
         for path_name, path_fn in possible_paths:
             if result is not None:
                 layers = result
                 layer_path = path_name
                 break
+        if layers:
+            analysis['total_layers'] = len(layers)
+            analysis['layer_path'] = layer_path
+            if len(layers) > 0:
+                first_layer = layers[0]
+                if hasattr(first_layer, 'self_attn'):
+                    analysis['has_self_attn'] = True
+                    attn = first_layer.self_attn
+                    if hasattr(attn, 'q_proj'):
+                        q_shape = attn.q_proj.weight.shape
+                        k_shape = attn.k_proj.weight.shape
+                        if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
+                            head_dim = q_shape[0] // config.num_attention_heads
+                            analysis['head_dim'] = head_dim
+                        analysis['gqa_detected'] = (k_shape[0] != q_shape[0])
+                        analysis['q_dim'] = q_shape[0]
+                        analysis['k_dim'] = k_shape[0]
         print(f"\n{'='*80}\n")
     except Exception as e:
         import traceback
+        print(f"\n❌ Structure analysis failed: {e}")
         return {
             'model_url': model_url,
             'error': str(e),
 # =====================================================
+# PHOENIX Retention (동일)
 # =====================================================
 class MultiScaleRetention(nn.Module):
         )
     def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+        """Repeat K/V heads (GQA)"""
         batch, num_key_value_heads, slen, head_dim = hidden_states.shape
         if n_rep == 1:
             return hidden_states
         past_key_values: Optional[Tuple[torch.Tensor]] = None,
         **kwargs
     ):
+        """O(n) Retention"""
         batch_size, seq_len, _ = hidden_states.shape
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
         if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
             self.to(device=target_device, dtype=target_dtype)
 class HierarchicalRetention(nn.Module):
+    """PHOENIX Hierarchical Retention"""
     def __init__(self, config, layer_idx=0):
         super().__init__()
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
         if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
             self.to(device=target_device, dtype=target_dtype)
 # =====================================================
 def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
+    """Transformer Attention → PHOENIX Retention"""
     print("🔄 Starting Attention → Retention conversion...")
     replaced_count = 0
     total_layers = 0
     layers = None
     if structure_info and structure_info.get('layer_path'):
         layer_path = structure_info['layer_path']
         if layer_path == 'model.layers':
             if hasattr(model, 'model') and hasattr(model.model, 'layers'):
             result = path_fn(model)
             if result is not None:
                 layers = result
                 break
     if layers is None:
 # =====================================================
+# 🆕 MULTI-GPU Fine-tuning 파이프라인
 # =====================================================
 def finetune_retention_model(
     batch_size: int = 4,
     learning_rate: float = 1e-5,
     output_dir: str = "/data/finetuning_temp",
+    use_gradient_checkpointing: bool = True,
 ):
     """
+    🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
     """
     print("\n" + "="*80)
+    print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
     print("="*80)
+    print(f"   GPUs: {NUM_GPUS}")
     print(f"   Target Steps: {num_steps}")
+    print(f"   Batch Size per GPU: {batch_size}")
+    print(f"   Global Batch Size: {batch_size * NUM_GPUS}")
     print(f"   Learning Rate: {learning_rate}")
+    print(f"   Gradient Checkpointing: {use_gradient_checkpointing}")
     start_time = time.time()
+    # ✅ Gradient Checkpointing (메모리 절약)
+    if use_gradient_checkpointing:
+        if hasattr(model, 'gradient_checkpointing_enable'):
+            model.gradient_checkpointing_enable()
+            print(f"   ✅ Gradient Checkpointing enabled")
+    # Dataset 준비
     train_dataset = prepare_simple_dataset(
         tokenizer=tokenizer,
         num_steps=num_steps,
+        batch_size=batch_size * NUM_GPUS  # Multi-GPU 고려
     )
+    # ✅ Multi-GPU Training Arguments
     training_args = TrainingArguments(
         output_dir=output_dir,
+        # 🚀 Multi-GPU 설정
+        per_device_train_batch_size=batch_size,  # GPU당 batch
+        gradient_accumulation_steps=max(1, 8 // NUM_GPUS),  # GPU 수에 따라 조정
+        # Training 설정
         num_train_epochs=1,
+        max_steps=num_steps,
         learning_rate=learning_rate,
         warmup_steps=100,
+        # Optimization
+        fp16=True,  # Mixed precision
+        optim="adamw_torch_fused",  # H100 최적화
+        # Logging
         logging_steps=50,
+        logging_first_step=True,
         save_steps=1000,
+        save_total_limit=2,
+        # Performance
+        dataloader_num_workers=4 * NUM_GPUS,  # GPU당 4 workers
+        dataloader_pin_memory=True,
+        # Multi-GPU 관련
+        ddp_find_unused_parameters=False,
+        ddp_backend="nccl",  # H100 최적화
+        # Misc
         remove_unused_columns=False,
         report_to="none",
+        # ✅ DeepSpeed (선택사항)
+        # deepspeed="ds_config.json",  # DeepSpeed 사용시
     )
     # Data collator
         mlm=False
     )
+    # ✅ Trainer (자동 Multi-GPU)
     trainer = Trainer(
         model=model,
         args=training_args,
     )
     # Train!
+    print(f"\n🚀 Starting Multi-GPU Fine-tuning...")
+    print(f"   Using {NUM_GPUS} GPUs")
     trainer.train()
     elapsed = time.time() - start_time
     print(f"\n✅ Fine-tuning Complete!")
     print(f"   Time: {elapsed/60:.1f} minutes")
+    print(f"   Effective samples/sec: {(num_steps * batch_size * NUM_GPUS) / elapsed:.2f}")
     print(f"="*80 + "\n")
     return model
     batch_size: int,
     max_length: int = 2048,
 ):
+    """Dataset 준비"""
     print(f"\n📊 Preparing Dataset...")
     num_samples = num_steps * batch_size
     tokenized = dataset.map(
         tokenize_function,
         batched=True,
+        remove_columns=dataset.column_names,
+        num_proc=4  # Parallel processing
     )
     print(f"   ✅ Tokenized: {len(tokenized)} samples")
     model_size: str,
     num_steps: int,
     batch_size: int,
+    num_gpus: int = NUM_GPUS,
+    gpu_type: str = "H100",
 ) -> Dict:
+    """비용 계산기 - Multi-GPU"""
     gpu_costs = {
         "H100": 3.0,
         "A100": 2.0,
         "A10G": 1.0,
     }
     model_step_times = {
         "14B": 6.0,
     }
+    # Multi-GPU로 인한 시간 단축 (linear scaling 가정)
     step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
+    step_time_per_gpu = step_time / num_gpus  # GPU 병렬화
+    total_seconds = num_steps * step_time_per_gpu
     total_hours = total_seconds / 3600
+    # 비용은 GPU 수만큼 곱함
+    total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0) * num_gpus
     return {
         'hours': round(total_hours, 2),
         'cost_usd': round(total_cost_usd, 2),
         'cost_krw': round(total_cost_usd * 1300, 0),
+        'num_gpus': num_gpus,
+        'gpu_type': gpu_type,
     }
 # =====================================================
+# Custom Modeling Code (동일)
 # =====================================================
 def generate_modeling_phoenix_code():
 # =====================================================
+# 저장/업로드/평가 (동일)
 # =====================================================
 def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
+    """PHOENIX 모델 저장"""
     output_path = Path(output_path)
     output_path.mkdir(parents=True, exist_ok=True)
     print(f"\n💾 Saving PHOENIX model...")
     if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
         if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
             model.lm_head.weight = model.model.embed_tokens.weight
     model.save_pretrained(output_path)
     tokenizer.save_pretrained(output_path)
     modeling_code = generate_modeling_phoenix_code()
     with open(output_path / "modeling_phoenix.py", "w") as f:
         f.write(modeling_code)
     config_path = output_path / "config.json"
     if config_path.exists():
         with open(config_path, "r") as f:
         with open(config_path, "w") as f:
             json.dump(config_dict, f, indent=2)
     with open(output_path / 'phoenix_metadata.json', 'w') as f:
         json.dump(metadata, f, indent=2)
     readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
+**Multi-GPU Trained** with {metadata.get('num_gpus', 1)} GPUs
 ## Features
 - ✅ Brumby-style Retraining
 - ✅ O(n) Complexity
 )
 ```
+**VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
 """
     with open(output_path / "README.md", "w") as f:
         f.write(readme)
+    print(f"   ✅ Model saved")
 def upload_to_huggingface_hub(
     model_path: str,
     original_model_url: str,
     private: bool = True,
     token: str = None,
 ) -> Tuple[bool, str, str]:
+    """Upload to Hub"""
     if token is None:
         token = HF_TOKEN
         return False, "", f"❌ Upload failed: {e}"
 def evaluate_model_quality(model, tokenizer):
+    """Quality 평가"""
     test_prompts = [
         "The capital of France is",
         "In machine learning,",
 # =====================================================
+# 🆕 Multi-GPU Burning 함수
 # =====================================================
 def burn_model_with_finetuning(
     num_steps: int = 3000,
     batch_size: int = 4,
     learning_rate: float = 1e-5,
+    use_gradient_checkpointing: bool = True,
 ):
+    """🆕 v2.0: Multi-GPU Optimized Burning"""
     print("="*80)
+    print(f"🔥 PHOENIX Model Burning v2.0 - Multi-GPU ({NUM_GPUS} GPUs)")
     print("="*80)
     output_path = Path(output_dir)
         print(f"\n🔍 STEP 1: Structure Analysis...")
         structure_info = analyze_model_structure(model_url)
+        # STEP 2: Load Model with device_map="auto"
+        print(f"\n📥 STEP 2: Loading model (Multi-GPU)...")
         start_time = time.time()
         config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
+        # ✅ Multi-GPU: device_map="auto"로 자동 분산
         model = AutoModelForCausalLM.from_pretrained(
             model_url,
             trust_remote_code=True,
             torch_dtype=torch.float16,
+            device_map="auto"  # 자동으로 8개 GPU에 분산!
+        )
         tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
         if tokenizer.pad_token is None:
             tokenizer.pad_token = tokenizer.eos_token
         load_time = time.time() - start_time
+        print(f"✅ Loaded across {NUM_GPUS} GPUs in {load_time:.1f}s")
         # STEP 3: Convert
         print(f"\n🔄 STEP 3: Converting Attention → Retention...")
         print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
+        # STEP 4: Fine-tuning (Multi-GPU)
         if enable_finetuning:
+            print(f"\n🚀 STEP 4: Multi-GPU Fine-tuning...")
             ft_start = time.time()
             model = finetune_retention_model(
                 num_steps=num_steps,
                 batch_size=batch_size,
                 learning_rate=learning_rate,
+                use_gradient_checkpointing=use_gradient_checkpointing,
             )
             ft_time = time.time() - ft_start
             print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
         else:
             ft_time = 0
+            print(f"\n⏭️ STEP 4: Fine-tuning skipped")
         # STEP 5: Evaluate
         print(f"\n📊 STEP 5: Evaluating...")
             'quality_score': quality_score,
             'finetuned': enable_finetuning,
             'finetuning_steps': num_steps if enable_finetuning else 0,
+            'num_gpus': NUM_GPUS,
+            'gradient_checkpointing': use_gradient_checkpointing,
             'timestamp': datetime.now().isoformat(),
         }
             'quality_score': quality_score,
             'total_time': total_time,
             'finetuned': enable_finetuning,
+            'num_gpus': NUM_GPUS,
             'structure_info': structure_info,
         }
         print(f"\n{'='*80}")
+        print(f"✅ Multi-GPU Burning Complete!")
+        print(f"   GPUs Used: {NUM_GPUS}")
         print(f"   Model: {output_path}")
         print(f"   Quality: {quality_score:.2f}/1.00")
         print(f"{'='*80}\n")
         return result
 # =====================================================
+# Database (동일)
 # =====================================================
 class ExperimentDatabase:
                     conversion_rate REAL,
                     quality_score REAL,
                     finetuned BOOLEAN,
+                    num_gpus INTEGER,
                     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
                 )
             """)
             cursor = conn.cursor()
             cursor.execute("""
                 INSERT INTO burning_history
+                (model_url, output_path, hub_url, conversion_rate, quality_score, finetuned, num_gpus)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
             """, (
                 info.get('model_url'),
                 info.get('output_path'),
                 info.get('conversion_rate'),
                 info.get('quality_score'),
                 info.get('finetuned'),
+                info.get('num_gpus', 1),
             ))
             conn.commit()
             return cursor.lastrowid
     ft_steps,
     ft_batch,
     ft_lr,
+    use_grad_ckpt,
     upload_hub,
     hub_repo,
     hub_private,
 ):
+    """Gradio UI"""
     try:
         if not model_url.strip():
         output_dir = f"{MODELS_PATH}/{output_name}"
+        # 비용 추정
         if enable_finetuning:
             model_size = "0.6B" if "0.6B" in model_url else "1.5B"
+            cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch, NUM_GPUS)
+            print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h with {NUM_GPUS} GPUs)")
         # Burn
         result = burn_model_with_finetuning(
             num_steps=ft_steps,
             batch_size=ft_batch,
             learning_rate=ft_lr,
+            use_gradient_checkpointing=use_grad_ckpt,
         )
         if result['status'] != 'success':
             'conversion_rate': result['conversion_rate'],
             'quality_score': result['quality_score'],
             'finetuned': enable_finetuning,
+            'num_gpus': NUM_GPUS,
         })
         # Output
         output_md = f"""
+# 🔥 PHOENIX v2.0 Multi-GPU Complete!
+## Hardware
+- **GPUs Used**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
 ## Model Info
 - **Original**: {model_url}
 - **Conversion**: {result['conversion_rate']*100:.1f}%
 - **Quality**: {result['quality_score']:.2f}/1.00
 - **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
 """
         if hub_url:
             output_md += f"""
+## Hub Status
 ✅ **Uploaded**: [{hub_url}]({hub_url})
 ```python
 model = AutoModelForCausalLM.from_pretrained(
     "{hub_url.replace('https://huggingface.co/', '')}",
+    trust_remote_code=True,
+    device_map="auto"  # Multi-GPU
 )
 ```
 """
         # Plot
         fig = go.Figure()
             y=[result['conversion_rate'], result['quality_score']],
             marker_color=['#3b82f6', '#10b981']
         ))
+        fig.update_layout(title=f"Metrics ({NUM_GPUS} GPUs)", yaxis_range=[0, 1])
         return output_md, fig
 def view_history():
+    """History"""
     try:
         history = db.get_history(20)
         if not history:
             x='timestamp',
             y='quality_score',
             color='finetuned',
+            size='num_gpus',
+            title='Burning History (Multi-GPU)'
         )
         return f"## History\n\n{df.to_markdown(index=False)}", fig
 # Gradio App
 # =====================================================
+with gr.Blocks(title="🔥 PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(f"""
+    # 🔥 PHOENIX v2.0 - Multi-GPU Optimized
+    **H100 x {NUM_GPUS} GPUs Ready**
+    🆕 **v2.0 Multi-GPU**: Accelerate 통합, DDP 지원
+    🆕 **v2.0**: Fine-tuning 파이프라인 (Brumby-style)
+    ✅ v1.4.3: All fixes included
     ✅ GQA Support | O(n) Complexity
     ---
                     burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
                     gr.Markdown("---")
+                    gr.Markdown(f"### 🆕 Fine-tuning ({NUM_GPUS} GPUs)")
                     burn_ft_enable = gr.Checkbox(
                         value=False,
                         label="🚀 Enable Fine-tuning (Brumby-style)",
+                        info=f"Multi-GPU acceleration with {NUM_GPUS} GPUs!"
                     )
                     burn_ft_steps = gr.Slider(
                         1000, 10000, 3000,
                         step=100,
+                        label="Steps",
                         visible=False
                     )
+                    burn_ft_batch = gr.Slider(
+                        1, 16, 4,
+                        step=1,
+                        label=f"Batch Size per GPU ({NUM_GPUS} GPUs)",
+                        visible=False
+                    )
                     burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
+                    burn_grad_ckpt = gr.Checkbox(
+                        value=True,
+                        label="✅ Gradient Checkpointing (saves memory)",
+                        visible=False
+                    )
                     def toggle_ft(enabled):
                         return [
                             gr.update(visible=enabled),
                             gr.update(visible=enabled),
                             gr.update(visible=enabled),
+                            gr.update(visible=enabled),
                         ]
                     burn_ft_enable.change(
                         toggle_ft,
                         [burn_ft_enable],
+                        [burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt]
                     )
                     gr.Markdown("---")
                 burn_phoenix_model_ui,
                 [
                     burn_url, burn_hier, burn_name,
+                    burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt,
                     burn_upload, burn_repo, burn_private
                 ],
                 [burn_output, burn_plot]
     gr.Markdown(f"""
     ---
+    ## 🔥 PHOENIX v2.0 Multi-GPU
+    **Hardware**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
+    **Features**:
+    - 🆕 Multi-GPU Training (DDP)
+    - 🆕 Gradient Checkpointing
+    - 🆕 H100 Optimized (fused optimizer)
+    - 🆕 Brumby-style Fine-tuning
+    - ✅ All v1.4.3 Fixes
     **Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
+    **VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
     """)