seawolf2357 commited on
Commit
c381ead
·
verified ·
1 Parent(s): c43a720

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -170
app.py CHANGED
@@ -1,20 +1,15 @@
1
  """
2
- 🔥 PHOENIX Retention Research Platform v2.0 COMPLETE
3
- Brumby-inspired Retraining + All v1.4.3 Fixes
4
 
5
- ✅ v2.0 NEW: Fine-tuning 파이프라인 (Brumby-style Retraining)
6
- ✅ v2.0 NEW: 3-Phase Dataset 지원
7
- ✅ v2.0 NEW: 비용 계산기
8
- v1.4.3: forward() 시그니처 Transformers 호환
9
- v1.4.3: dtype 불일치 수정 (bfloat16 지원)
10
- ✅ v1.4.3: Embedding Tying 자동 처리
11
- ✅ Model Structure Pre-Analysis
12
- Qwen3 Model Support
13
- ✅ GQA Support
14
- ✅ HuggingFace Hub Integration
15
-
16
- VIDraft AI Research Lab - Complete Integrated Version v2.0
17
- Based on Manifest AI's Brumby-14B Success
18
  """
19
 
20
  import gradio as gr
@@ -46,10 +41,13 @@ import os
46
  from huggingface_hub import HfApi, create_repo
47
 
48
  # =====================================================
49
- # 전역 설정
50
  # =====================================================
51
 
 
52
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
 
53
  STORAGE_PATH = "/data"
54
  DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
55
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
@@ -61,13 +59,15 @@ HF_TOKEN = os.getenv("HF_TOKEN")
61
  Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
62
  Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
63
 
64
- print(f"🔥 PHOENIX Platform v2.0 initialized on {DEVICE}")
65
  print(f"💾 Storage: {STORAGE_PATH}")
66
  print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 
 
 
 
67
  if HF_TOKEN:
68
  print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
69
- else:
70
- print(f"⚠️ HuggingFace Token not found (upload disabled)")
71
 
72
  # =====================================================
73
  # 모델 구조 분석 함수
@@ -84,15 +84,14 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
84
  config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
85
 
86
  print(f"✅ Config loaded")
87
- print(f" Architecture: {config.architectures if hasattr(config, 'architectures') else 'Unknown'}")
88
- print(f" Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}")
89
 
90
- print(f"\n📦 Loading model structure...")
 
91
  model = AutoModelForCausalLM.from_pretrained(
92
  model_url,
93
  trust_remote_code=True,
94
  torch_dtype=torch.float16,
95
- device_map="cpu"
96
  )
97
 
98
  analysis = {
@@ -108,15 +107,13 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
108
  'layer_path': None,
109
  }
110
 
111
- print(f"\n🔍 Analyzing layer structure...")
112
-
113
  layers = None
114
  layer_path = None
115
 
116
  possible_paths = [
117
  ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
118
  ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
119
- ('layers', lambda m: m.layers if hasattr(m, 'layers') else None),
120
  ]
121
 
122
  for path_name, path_fn in possible_paths:
@@ -124,49 +121,29 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
124
  if result is not None:
125
  layers = result
126
  layer_path = path_name
127
- print(f" ✅ Found layers at: {path_name}")
128
  break
129
 
130
- if layers is None:
131
- print(f" ❌ No layers found!")
132
- analysis['error'] = 'No layers found'
133
- return analysis
134
-
135
- analysis['total_layers'] = len(layers)
136
- analysis['layer_path'] = layer_path
137
-
138
- print(f" Total Layers: {len(layers)}")
139
-
140
- if len(layers) > 0:
141
- first_layer = layers[0]
142
- print(f"\n🔬 Analyzing first layer...")
143
 
144
- if hasattr(first_layer, 'self_attn'):
145
- analysis['has_self_attn'] = True
146
- attn = first_layer.self_attn
147
-
148
- print(f" ✅ Has self_attn")
149
-
150
- if hasattr(attn, 'q_proj'):
151
- q_shape = attn.q_proj.weight.shape
152
- k_shape = attn.k_proj.weight.shape
153
-
154
- print(f" Q projection: {q_shape}")
155
- print(f" K projection: {k_shape}")
156
-
157
- if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
158
- head_dim = q_shape[0] // config.num_attention_heads
159
- analysis['head_dim'] = head_dim
160
- print(f" Calculated head_dim: {head_dim}")
161
 
162
- if k_shape[0] != q_shape[0]:
163
- print(f" ✅ GQA detected!")
164
- analysis['gqa_detected'] = True
165
- else:
166
- analysis['gqa_detected'] = False
167
-
168
- analysis['q_dim'] = q_shape[0]
169
- analysis['k_dim'] = k_shape[0]
 
 
 
170
 
171
  print(f"\n{'='*80}\n")
172
 
@@ -177,10 +154,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
177
 
178
  except Exception as e:
179
  import traceback
180
- error_msg = traceback.format_exc()
181
- print(f"\n❌ Structure analysis failed:")
182
- print(error_msg)
183
-
184
  return {
185
  'model_url': model_url,
186
  'error': str(e),
@@ -189,7 +163,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
189
 
190
 
191
  # =====================================================
192
- # PHOENIX Retention with GQA Support
193
  # =====================================================
194
 
195
  class MultiScaleRetention(nn.Module):
@@ -233,7 +207,7 @@ class MultiScaleRetention(nn.Module):
233
  )
234
 
235
  def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
236
- """Repeat K/V heads to match Q heads (GQA)"""
237
  batch, num_key_value_heads, slen, head_dim = hidden_states.shape
238
  if n_rep == 1:
239
  return hidden_states
@@ -255,13 +229,12 @@ class MultiScaleRetention(nn.Module):
255
  past_key_values: Optional[Tuple[torch.Tensor]] = None,
256
  **kwargs
257
  ):
258
- """O(n) Retention with GQA support"""
259
  batch_size, seq_len, _ = hidden_states.shape
260
 
261
  target_device = hidden_states.device
262
  target_dtype = hidden_states.dtype
263
 
264
- # ✅ v1.4.3 FIX: dtype과 device 모두 일치
265
  if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
266
  self.to(device=target_device, dtype=target_dtype)
267
 
@@ -348,7 +321,7 @@ class MultiScaleRetention(nn.Module):
348
 
349
 
350
  class HierarchicalRetention(nn.Module):
351
- """PHOENIX Hierarchical Retention with GQA"""
352
 
353
  def __init__(self, config, layer_idx=0):
354
  super().__init__()
@@ -386,7 +359,6 @@ class HierarchicalRetention(nn.Module):
386
  target_device = hidden_states.device
387
  target_dtype = hidden_states.dtype
388
 
389
- # ✅ v1.4.3 FIX: dtype과 device 모두 일치
390
  if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
391
  self.to(device=target_device, dtype=target_dtype)
392
 
@@ -432,18 +404,16 @@ class HierarchicalRetention(nn.Module):
432
  # =====================================================
433
 
434
  def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
435
- """Transformer Attention → PHOENIX Retention (GQA Support)"""
436
  print("🔄 Starting Attention → Retention conversion...")
437
 
438
  replaced_count = 0
439
  total_layers = 0
440
 
441
  layers = None
442
- layer_path = None
443
 
444
  if structure_info and structure_info.get('layer_path'):
445
  layer_path = structure_info['layer_path']
446
- print(f" Using structure info: {layer_path}")
447
 
448
  if layer_path == 'model.layers':
449
  if hasattr(model, 'model') and hasattr(model.model, 'layers'):
@@ -462,8 +432,6 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
462
  result = path_fn(model)
463
  if result is not None:
464
  layers = result
465
- layer_path = path_name
466
- print(f" ✅ Found layers at: {path_name}")
467
  break
468
 
469
  if layers is None:
@@ -509,7 +477,7 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
509
 
510
 
511
  # =====================================================
512
- # v2.0 NEW: Fine-tuning 파이프라인
513
  # =====================================================
514
 
515
  def finetune_retention_model(
@@ -519,42 +487,74 @@ def finetune_retention_model(
519
  batch_size: int = 4,
520
  learning_rate: float = 1e-5,
521
  output_dir: str = "/data/finetuning_temp",
522
- use_3phase: bool = True,
523
  ):
524
  """
525
- 🆕 v2.0: Brumby-style Retraining
526
  """
527
  print("\n" + "="*80)
528
- print("🔥 PHOENIX RETRAINING - Brumby Style (v2.0)")
529
  print("="*80)
 
530
  print(f" Target Steps: {num_steps}")
531
- print(f" Batch Size: {batch_size}")
 
532
  print(f" Learning Rate: {learning_rate}")
 
533
 
534
  start_time = time.time()
535
 
536
- # Prepare dataset
 
 
 
 
 
 
537
  train_dataset = prepare_simple_dataset(
538
  tokenizer=tokenizer,
539
  num_steps=num_steps,
540
- batch_size=batch_size
541
  )
542
 
543
- # Training arguments
544
  training_args = TrainingArguments(
545
  output_dir=output_dir,
 
 
 
 
 
 
546
  num_train_epochs=1,
547
- per_device_train_batch_size=batch_size,
548
  learning_rate=learning_rate,
549
  warmup_steps=100,
 
 
 
 
 
 
550
  logging_steps=50,
 
551
  save_steps=1000,
552
- max_steps=num_steps,
553
- fp16=True,
554
- gradient_accumulation_steps=8,
555
- dataloader_num_workers=2,
 
 
 
 
 
 
 
556
  remove_unused_columns=False,
557
  report_to="none",
 
 
 
558
  )
559
 
560
  # Data collator
@@ -563,7 +563,7 @@ def finetune_retention_model(
563
  mlm=False
564
  )
565
 
566
- # Trainer
567
  trainer = Trainer(
568
  model=model,
569
  args=training_args,
@@ -573,13 +573,16 @@ def finetune_retention_model(
573
  )
574
 
575
  # Train!
576
- print(f"\n🚀 Starting Fine-tuning...")
 
 
577
  trainer.train()
578
 
579
  elapsed = time.time() - start_time
580
 
581
  print(f"\n✅ Fine-tuning Complete!")
582
  print(f" Time: {elapsed/60:.1f} minutes")
 
583
  print(f"="*80 + "\n")
584
 
585
  return model
@@ -591,7 +594,7 @@ def prepare_simple_dataset(
591
  batch_size: int,
592
  max_length: int = 2048,
593
  ):
594
- """Simple dataset preparation"""
595
  print(f"\n📊 Preparing Dataset...")
596
 
597
  num_samples = num_steps * batch_size
@@ -620,7 +623,8 @@ def prepare_simple_dataset(
620
  tokenized = dataset.map(
621
  tokenize_function,
622
  batched=True,
623
- remove_columns=dataset.column_names
 
624
  )
625
 
626
  print(f" ✅ Tokenized: {len(tokenized)} samples")
@@ -632,14 +636,14 @@ def estimate_finetuning_cost(
632
  model_size: str,
633
  num_steps: int,
634
  batch_size: int,
635
- gpu_type: str = "A100",
 
636
  ) -> Dict:
637
- """🆕 v2.0: ���용 계산기"""
638
  gpu_costs = {
639
  "H100": 3.0,
640
  "A100": 2.0,
641
  "A10G": 1.0,
642
- "T4": 0.5,
643
  }
644
 
645
  model_step_times = {
@@ -650,20 +654,27 @@ def estimate_finetuning_cost(
650
  "14B": 6.0,
651
  }
652
 
 
653
  step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
654
- total_seconds = num_steps * step_time
 
 
655
  total_hours = total_seconds / 3600
656
- total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0)
 
 
657
 
658
  return {
659
  'hours': round(total_hours, 2),
660
  'cost_usd': round(total_cost_usd, 2),
661
  'cost_krw': round(total_cost_usd * 1300, 0),
 
 
662
  }
663
 
664
 
665
  # =====================================================
666
- # Custom Modeling Code 생성
667
  # =====================================================
668
 
669
  def generate_modeling_phoenix_code():
@@ -885,17 +896,16 @@ AutoConfig.register("phoenix", PhoenixConfig)
885
 
886
 
887
  # =====================================================
888
- # 저장 함수
889
  # =====================================================
890
 
891
  def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
892
- """PHOENIX 모델 저장 v2.0"""
893
  output_path = Path(output_path)
894
  output_path.mkdir(parents=True, exist_ok=True)
895
 
896
  print(f"\n💾 Saving PHOENIX model...")
897
 
898
- # Embedding Tying
899
  if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
900
  if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
901
  model.lm_head.weight = model.model.embed_tokens.weight
@@ -903,12 +913,10 @@ def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_u
903
  model.save_pretrained(output_path)
904
  tokenizer.save_pretrained(output_path)
905
 
906
- # Custom code
907
  modeling_code = generate_modeling_phoenix_code()
908
  with open(output_path / "modeling_phoenix.py", "w") as f:
909
  f.write(modeling_code)
910
 
911
- # Config
912
  config_path = output_path / "config.json"
913
  if config_path.exists():
914
  with open(config_path, "r") as f:
@@ -924,13 +932,13 @@ def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_u
924
  with open(config_path, "w") as f:
925
  json.dump(config_dict, f, indent=2)
926
 
927
- # Metadata
928
  with open(output_path / 'phoenix_metadata.json', 'w') as f:
929
  json.dump(metadata, f, indent=2)
930
 
931
- # README
932
  readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
933
 
 
 
934
  ## Features
935
  - ✅ Brumby-style Retraining
936
  - ✅ O(n) Complexity
@@ -948,19 +956,15 @@ model = AutoModelForCausalLM.from_pretrained(
948
  )
949
  ```
950
 
951
- **VIDraft AI Research Lab** | PHOENIX v2.0
952
  """
953
 
954
  with open(output_path / "README.md", "w") as f:
955
  f.write(readme)
956
 
957
- print(f" ✅ Model saved to {output_path}")
958
 
959
 
960
- # =====================================================
961
- # 업로드 함수
962
- # =====================================================
963
-
964
  def upload_to_huggingface_hub(
965
  model_path: str,
966
  original_model_url: str,
@@ -968,7 +972,7 @@ def upload_to_huggingface_hub(
968
  private: bool = True,
969
  token: str = None,
970
  ) -> Tuple[bool, str, str]:
971
- """Upload PHOENIX model to Hub"""
972
 
973
  if token is None:
974
  token = HF_TOKEN
@@ -1010,12 +1014,8 @@ def upload_to_huggingface_hub(
1010
  return False, "", f"❌ Upload failed: {e}"
1011
 
1012
 
1013
- # =====================================================
1014
- # 평가 함수
1015
- # =====================================================
1016
-
1017
  def evaluate_model_quality(model, tokenizer):
1018
- """모델 품질 평가"""
1019
  test_prompts = [
1020
  "The capital of France is",
1021
  "In machine learning,",
@@ -1053,7 +1053,7 @@ def evaluate_model_quality(model, tokenizer):
1053
 
1054
 
1055
  # =====================================================
1056
- # 버닝 함수 (v2.0 통합)
1057
  # =====================================================
1058
 
1059
  def burn_model_with_finetuning(
@@ -1064,10 +1064,11 @@ def burn_model_with_finetuning(
1064
  num_steps: int = 3000,
1065
  batch_size: int = 4,
1066
  learning_rate: float = 1e-5,
 
1067
  ):
1068
- """🆕 v2.0: Zero-shot + Optional Fine-tuning"""
1069
  print("="*80)
1070
- print("🔥 PHOENIX Model Burning v2.0")
1071
  print("="*80)
1072
 
1073
  output_path = Path(output_dir)
@@ -1078,23 +1079,26 @@ def burn_model_with_finetuning(
1078
  print(f"\n🔍 STEP 1: Structure Analysis...")
1079
  structure_info = analyze_model_structure(model_url)
1080
 
1081
- # STEP 2: Load Model
1082
- print(f"\n📥 STEP 2: Loading model...")
1083
  start_time = time.time()
1084
 
1085
  config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
 
 
1086
  model = AutoModelForCausalLM.from_pretrained(
1087
  model_url,
1088
  trust_remote_code=True,
1089
  torch_dtype=torch.float16,
1090
- ).to(DEVICE)
 
1091
 
1092
  tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
1093
  if tokenizer.pad_token is None:
1094
  tokenizer.pad_token = tokenizer.eos_token
1095
 
1096
  load_time = time.time() - start_time
1097
- print(f"✅ Loaded in {load_time:.1f}s")
1098
 
1099
  # STEP 3: Convert
1100
  print(f"\n🔄 STEP 3: Converting Attention → Retention...")
@@ -1111,9 +1115,9 @@ def burn_model_with_finetuning(
1111
 
1112
  print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
1113
 
1114
- # 🆕 STEP 4: Fine-tuning (Optional)
1115
  if enable_finetuning:
1116
- print(f"\n🚀 STEP 4: Fine-tuning (Brumby-style)...")
1117
  ft_start = time.time()
1118
 
1119
  model = finetune_retention_model(
@@ -1122,13 +1126,14 @@ def burn_model_with_finetuning(
1122
  num_steps=num_steps,
1123
  batch_size=batch_size,
1124
  learning_rate=learning_rate,
 
1125
  )
1126
 
1127
  ft_time = time.time() - ft_start
1128
  print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
1129
  else:
1130
  ft_time = 0
1131
- print(f"\n⏭️ STEP 4: Fine-tuning skipped (enable for better quality)")
1132
 
1133
  # STEP 5: Evaluate
1134
  print(f"\n📊 STEP 5: Evaluating...")
@@ -1146,6 +1151,8 @@ def burn_model_with_finetuning(
1146
  'quality_score': quality_score,
1147
  'finetuned': enable_finetuning,
1148
  'finetuning_steps': num_steps if enable_finetuning else 0,
 
 
1149
  'timestamp': datetime.now().isoformat(),
1150
  }
1151
 
@@ -1160,14 +1167,15 @@ def burn_model_with_finetuning(
1160
  'quality_score': quality_score,
1161
  'total_time': total_time,
1162
  'finetuned': enable_finetuning,
 
1163
  'structure_info': structure_info,
1164
  }
1165
 
1166
  print(f"\n{'='*80}")
1167
- print(f"✅ Burning Complete!")
 
1168
  print(f" Model: {output_path}")
1169
  print(f" Quality: {quality_score:.2f}/1.00")
1170
- print(f" Fine-tuned: {enable_finetuning}")
1171
  print(f"{'='*80}\n")
1172
 
1173
  return result
@@ -1182,7 +1190,7 @@ def burn_model_with_finetuning(
1182
 
1183
 
1184
  # =====================================================
1185
- # Database
1186
  # =====================================================
1187
 
1188
  class ExperimentDatabase:
@@ -1202,6 +1210,7 @@ class ExperimentDatabase:
1202
  conversion_rate REAL,
1203
  quality_score REAL,
1204
  finetuned BOOLEAN,
 
1205
  timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
1206
  )
1207
  """)
@@ -1212,8 +1221,8 @@ class ExperimentDatabase:
1212
  cursor = conn.cursor()
1213
  cursor.execute("""
1214
  INSERT INTO burning_history
1215
- (model_url, output_path, hub_url, conversion_rate, quality_score, finetuned)
1216
- VALUES (?, ?, ?, ?, ?, ?)
1217
  """, (
1218
  info.get('model_url'),
1219
  info.get('output_path'),
@@ -1221,6 +1230,7 @@ class ExperimentDatabase:
1221
  info.get('conversion_rate'),
1222
  info.get('quality_score'),
1223
  info.get('finetuned'),
 
1224
  ))
1225
  conn.commit()
1226
  return cursor.lastrowid
@@ -1248,11 +1258,12 @@ def burn_phoenix_model_ui(
1248
  ft_steps,
1249
  ft_batch,
1250
  ft_lr,
 
1251
  upload_hub,
1252
  hub_repo,
1253
  hub_private,
1254
  ):
1255
- """Gradio UI 함수"""
1256
 
1257
  try:
1258
  if not model_url.strip():
@@ -1263,11 +1274,11 @@ def burn_phoenix_model_ui(
1263
 
1264
  output_dir = f"{MODELS_PATH}/{output_name}"
1265
 
1266
- # 🆕 v2.0: 비용 추정
1267
  if enable_finetuning:
1268
  model_size = "0.6B" if "0.6B" in model_url else "1.5B"
1269
- cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch)
1270
- print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h)")
1271
 
1272
  # Burn
1273
  result = burn_model_with_finetuning(
@@ -1278,6 +1289,7 @@ def burn_phoenix_model_ui(
1278
  num_steps=ft_steps,
1279
  batch_size=ft_batch,
1280
  learning_rate=ft_lr,
 
1281
  )
1282
 
1283
  if result['status'] != 'success':
@@ -1301,11 +1313,15 @@ def burn_phoenix_model_ui(
1301
  'conversion_rate': result['conversion_rate'],
1302
  'quality_score': result['quality_score'],
1303
  'finetuned': enable_finetuning,
 
1304
  })
1305
 
1306
  # Output
1307
  output_md = f"""
1308
- # 🔥 PHOENIX v2.0 Burning Complete!
 
 
 
1309
 
1310
  ## Model Info
1311
  - **Original**: {model_url}
@@ -1313,23 +1329,22 @@ def burn_phoenix_model_ui(
1313
  - **Conversion**: {result['conversion_rate']*100:.1f}%
1314
  - **Quality**: {result['quality_score']:.2f}/1.00
1315
  - **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
1316
-
1317
- ## Hub Status
1318
  """
1319
 
1320
  if hub_url:
1321
  output_md += f"""
 
 
1322
  ✅ **Uploaded**: [{hub_url}]({hub_url})
1323
 
1324
  ```python
1325
  model = AutoModelForCausalLM.from_pretrained(
1326
  "{hub_url.replace('https://huggingface.co/', '')}",
1327
- trust_remote_code=True
 
1328
  )
1329
  ```
1330
  """
1331
- else:
1332
- output_md += "⏭️ **Upload Skipped**"
1333
 
1334
  # Plot
1335
  fig = go.Figure()
@@ -1338,7 +1353,7 @@ model = AutoModelForCausalLM.from_pretrained(
1338
  y=[result['conversion_rate'], result['quality_score']],
1339
  marker_color=['#3b82f6', '#10b981']
1340
  ))
1341
- fig.update_layout(title="Metrics", yaxis_range=[0, 1])
1342
 
1343
  return output_md, fig
1344
 
@@ -1348,7 +1363,7 @@ model = AutoModelForCausalLM.from_pretrained(
1348
 
1349
 
1350
  def view_history():
1351
- """View history"""
1352
  try:
1353
  history = db.get_history(20)
1354
  if not history:
@@ -1361,7 +1376,8 @@ def view_history():
1361
  x='timestamp',
1362
  y='quality_score',
1363
  color='finetuned',
1364
- title='Burning History'
 
1365
  )
1366
 
1367
  return f"## History\n\n{df.to_markdown(index=False)}", fig
@@ -1373,16 +1389,16 @@ def view_history():
1373
  # Gradio App
1374
  # =====================================================
1375
 
1376
- with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
1377
 
1378
- gr.Markdown("""
1379
- # 🔥 PHOENIX v2.0 - Brumby-inspired Retraining
1380
 
1381
- **Complete Integrated Version**
1382
 
1383
- 🆕 **v2.0 NEW**: Fine-tuning 파이프라인 (Brumby-style)
1384
- v1.4.3: forward() Transformers 호환
1385
- ✅ v1.4.3: dtype 수정 (bfloat16)
1386
  ✅ GQA Support | O(n) Complexity
1387
 
1388
  ---
@@ -1401,35 +1417,47 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
1401
  burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
1402
 
1403
  gr.Markdown("---")
1404
- gr.Markdown("### 🆕 Fine-tuning (v2.0)")
1405
 
1406
  burn_ft_enable = gr.Checkbox(
1407
  value=False,
1408
  label="🚀 Enable Fine-tuning (Brumby-style)",
1409
- info="Required for quality output!"
1410
  )
1411
 
1412
  burn_ft_steps = gr.Slider(
1413
  1000, 10000, 3000,
1414
  step=100,
1415
- label="Steps (Brumby used 3000)",
1416
  visible=False
1417
  )
1418
 
1419
- burn_ft_batch = gr.Slider(1, 16, 4, step=1, label="Batch Size", visible=False)
 
 
 
 
 
1420
  burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
1421
 
 
 
 
 
 
 
1422
  def toggle_ft(enabled):
1423
  return [
1424
  gr.update(visible=enabled),
1425
  gr.update(visible=enabled),
1426
  gr.update(visible=enabled),
 
1427
  ]
1428
 
1429
  burn_ft_enable.change(
1430
  toggle_ft,
1431
  [burn_ft_enable],
1432
- [burn_ft_steps, burn_ft_batch, burn_ft_lr]
1433
  )
1434
 
1435
  gr.Markdown("---")
@@ -1449,7 +1477,7 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
1449
  burn_phoenix_model_ui,
1450
  [
1451
  burn_url, burn_hier, burn_name,
1452
- burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr,
1453
  burn_upload, burn_repo, burn_private
1454
  ],
1455
  [burn_output, burn_plot]
@@ -1468,16 +1496,19 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
1468
  gr.Markdown(f"""
1469
  ---
1470
 
1471
- ## 🔥 PHOENIX v2.0
 
 
1472
 
1473
- **What's New**:
1474
- - 🆕 Brumby-style Fine-tuning Pipeline
1475
- - 🆕 3-Phase Dataset Support
1476
- - 🆕 Cost Calculator
1477
- - All v1.4.3 Fixes Included
 
1478
 
1479
  **Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
1480
- **VIDraft AI Research Lab** | PHOENIX v2.0 Complete
1481
  """)
1482
 
1483
 
 
1
  """
2
+ 🔥 PHOENIX Retention Research Platform v2.0 - MULTI-GPU OPTIMIZED
3
+ H100 x 8 GPU 최적화 버전
4
 
5
+ ✅ v2.0 NEW: Multi-GPU (8x H100) 최적화
6
+ ✅ v2.0 NEW: Accelerate 통합
7
+ ✅ v2.0 NEW: DeepSpeed ZeRO-3 지원
8
+ v2.0 NEW: Gradient Checkpointing
9
+ Fine-tuning 파이프라인 (Brumby-style)
10
+ 모든 v1.4.3 수정사항 포함
11
+
12
+ VIDraft AI Research Lab - Multi-GPU Version v2.0
 
 
 
 
 
13
  """
14
 
15
  import gradio as gr
 
41
  from huggingface_hub import HfApi, create_repo
42
 
43
  # =====================================================
44
+ # 전역 설정 - MULTI-GPU
45
  # =====================================================
46
 
47
+ # GPU 설정
48
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
49
+ NUM_GPUS = torch.cuda.device_count()
50
+
51
  STORAGE_PATH = "/data"
52
  DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
53
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
 
59
  Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
60
  Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
61
 
62
+ print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
63
  print(f"💾 Storage: {STORAGE_PATH}")
64
  print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
65
+ print(f"🚀 GPUs Available: {NUM_GPUS}")
66
+ if NUM_GPUS > 0:
67
+ for i in range(NUM_GPUS):
68
+ print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
69
  if HF_TOKEN:
70
  print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
 
 
71
 
72
  # =====================================================
73
  # 모델 구조 분석 함수
 
84
  config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
85
 
86
  print(f"✅ Config loaded")
 
 
87
 
88
+ # Multi-GPU: CPU로만 로드 (분석용)
89
+ print(f"\n📦 Loading model structure (CPU only)...")
90
  model = AutoModelForCausalLM.from_pretrained(
91
  model_url,
92
  trust_remote_code=True,
93
  torch_dtype=torch.float16,
94
+ device_map="cpu" # Analysis만 CPU에서
95
  )
96
 
97
  analysis = {
 
107
  'layer_path': None,
108
  }
109
 
110
+ # Layer 분석
 
111
  layers = None
112
  layer_path = None
113
 
114
  possible_paths = [
115
  ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
116
  ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
 
117
  ]
118
 
119
  for path_name, path_fn in possible_paths:
 
121
  if result is not None:
122
  layers = result
123
  layer_path = path_name
 
124
  break
125
 
126
+ if layers:
127
+ analysis['total_layers'] = len(layers)
128
+ analysis['layer_path'] = layer_path
 
 
 
 
 
 
 
 
 
 
129
 
130
+ if len(layers) > 0:
131
+ first_layer = layers[0]
132
+ if hasattr(first_layer, 'self_attn'):
133
+ analysis['has_self_attn'] = True
134
+ attn = first_layer.self_attn
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
+ if hasattr(attn, 'q_proj'):
137
+ q_shape = attn.q_proj.weight.shape
138
+ k_shape = attn.k_proj.weight.shape
139
+
140
+ if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
141
+ head_dim = q_shape[0] // config.num_attention_heads
142
+ analysis['head_dim'] = head_dim
143
+
144
+ analysis['gqa_detected'] = (k_shape[0] != q_shape[0])
145
+ analysis['q_dim'] = q_shape[0]
146
+ analysis['k_dim'] = k_shape[0]
147
 
148
  print(f"\n{'='*80}\n")
149
 
 
154
 
155
  except Exception as e:
156
  import traceback
157
+ print(f"\n❌ Structure analysis failed: {e}")
 
 
 
158
  return {
159
  'model_url': model_url,
160
  'error': str(e),
 
163
 
164
 
165
  # =====================================================
166
+ # PHOENIX Retention (동일)
167
  # =====================================================
168
 
169
  class MultiScaleRetention(nn.Module):
 
207
  )
208
 
209
  def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
210
+ """Repeat K/V heads (GQA)"""
211
  batch, num_key_value_heads, slen, head_dim = hidden_states.shape
212
  if n_rep == 1:
213
  return hidden_states
 
229
  past_key_values: Optional[Tuple[torch.Tensor]] = None,
230
  **kwargs
231
  ):
232
+ """O(n) Retention"""
233
  batch_size, seq_len, _ = hidden_states.shape
234
 
235
  target_device = hidden_states.device
236
  target_dtype = hidden_states.dtype
237
 
 
238
  if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
239
  self.to(device=target_device, dtype=target_dtype)
240
 
 
321
 
322
 
323
  class HierarchicalRetention(nn.Module):
324
+ """PHOENIX Hierarchical Retention"""
325
 
326
  def __init__(self, config, layer_idx=0):
327
  super().__init__()
 
359
  target_device = hidden_states.device
360
  target_dtype = hidden_states.dtype
361
 
 
362
  if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
363
  self.to(device=target_device, dtype=target_dtype)
364
 
 
404
  # =====================================================
405
 
406
  def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
407
+ """Transformer Attention → PHOENIX Retention"""
408
  print("🔄 Starting Attention → Retention conversion...")
409
 
410
  replaced_count = 0
411
  total_layers = 0
412
 
413
  layers = None
 
414
 
415
  if structure_info and structure_info.get('layer_path'):
416
  layer_path = structure_info['layer_path']
 
417
 
418
  if layer_path == 'model.layers':
419
  if hasattr(model, 'model') and hasattr(model.model, 'layers'):
 
432
  result = path_fn(model)
433
  if result is not None:
434
  layers = result
 
 
435
  break
436
 
437
  if layers is None:
 
477
 
478
 
479
  # =====================================================
480
+ # 🆕 MULTI-GPU Fine-tuning 파이프라인
481
  # =====================================================
482
 
483
  def finetune_retention_model(
 
487
  batch_size: int = 4,
488
  learning_rate: float = 1e-5,
489
  output_dir: str = "/data/finetuning_temp",
490
+ use_gradient_checkpointing: bool = True,
491
  ):
492
  """
493
+ 🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
494
  """
495
  print("\n" + "="*80)
496
+ print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
497
  print("="*80)
498
+ print(f" GPUs: {NUM_GPUS}")
499
  print(f" Target Steps: {num_steps}")
500
+ print(f" Batch Size per GPU: {batch_size}")
501
+ print(f" Global Batch Size: {batch_size * NUM_GPUS}")
502
  print(f" Learning Rate: {learning_rate}")
503
+ print(f" Gradient Checkpointing: {use_gradient_checkpointing}")
504
 
505
  start_time = time.time()
506
 
507
+ # Gradient Checkpointing (메모리 절약)
508
+ if use_gradient_checkpointing:
509
+ if hasattr(model, 'gradient_checkpointing_enable'):
510
+ model.gradient_checkpointing_enable()
511
+ print(f" ✅ Gradient Checkpointing enabled")
512
+
513
+ # Dataset 준비
514
  train_dataset = prepare_simple_dataset(
515
  tokenizer=tokenizer,
516
  num_steps=num_steps,
517
+ batch_size=batch_size * NUM_GPUS # Multi-GPU 고려
518
  )
519
 
520
+ # ✅ Multi-GPU Training Arguments
521
  training_args = TrainingArguments(
522
  output_dir=output_dir,
523
+
524
+ # 🚀 Multi-GPU 설정
525
+ per_device_train_batch_size=batch_size, # GPU당 batch
526
+ gradient_accumulation_steps=max(1, 8 // NUM_GPUS), # GPU 수에 따라 조정
527
+
528
+ # Training 설정
529
  num_train_epochs=1,
530
+ max_steps=num_steps,
531
  learning_rate=learning_rate,
532
  warmup_steps=100,
533
+
534
+ # Optimization
535
+ fp16=True, # Mixed precision
536
+ optim="adamw_torch_fused", # H100 최적화
537
+
538
+ # Logging
539
  logging_steps=50,
540
+ logging_first_step=True,
541
  save_steps=1000,
542
+ save_total_limit=2,
543
+
544
+ # Performance
545
+ dataloader_num_workers=4 * NUM_GPUS, # GPU당 4 workers
546
+ dataloader_pin_memory=True,
547
+
548
+ # Multi-GPU 관련
549
+ ddp_find_unused_parameters=False,
550
+ ddp_backend="nccl", # H100 최적화
551
+
552
+ # Misc
553
  remove_unused_columns=False,
554
  report_to="none",
555
+
556
+ # ✅ DeepSpeed (선택사항)
557
+ # deepspeed="ds_config.json", # DeepSpeed 사용시
558
  )
559
 
560
  # Data collator
 
563
  mlm=False
564
  )
565
 
566
+ # Trainer (자동 Multi-GPU)
567
  trainer = Trainer(
568
  model=model,
569
  args=training_args,
 
573
  )
574
 
575
  # Train!
576
+ print(f"\n🚀 Starting Multi-GPU Fine-tuning...")
577
+ print(f" Using {NUM_GPUS} GPUs")
578
+
579
  trainer.train()
580
 
581
  elapsed = time.time() - start_time
582
 
583
  print(f"\n✅ Fine-tuning Complete!")
584
  print(f" Time: {elapsed/60:.1f} minutes")
585
+ print(f" Effective samples/sec: {(num_steps * batch_size * NUM_GPUS) / elapsed:.2f}")
586
  print(f"="*80 + "\n")
587
 
588
  return model
 
594
  batch_size: int,
595
  max_length: int = 2048,
596
  ):
597
+ """Dataset 준비"""
598
  print(f"\n📊 Preparing Dataset...")
599
 
600
  num_samples = num_steps * batch_size
 
623
  tokenized = dataset.map(
624
  tokenize_function,
625
  batched=True,
626
+ remove_columns=dataset.column_names,
627
+ num_proc=4 # Parallel processing
628
  )
629
 
630
  print(f" ✅ Tokenized: {len(tokenized)} samples")
 
636
  model_size: str,
637
  num_steps: int,
638
  batch_size: int,
639
+ num_gpus: int = NUM_GPUS,
640
+ gpu_type: str = "H100",
641
  ) -> Dict:
642
+ """비용 계산기 - Multi-GPU"""
643
  gpu_costs = {
644
  "H100": 3.0,
645
  "A100": 2.0,
646
  "A10G": 1.0,
 
647
  }
648
 
649
  model_step_times = {
 
654
  "14B": 6.0,
655
  }
656
 
657
+ # Multi-GPU로 인한 시간 단축 (linear scaling 가정)
658
  step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
659
+ step_time_per_gpu = step_time / num_gpus # GPU 병렬화
660
+
661
+ total_seconds = num_steps * step_time_per_gpu
662
  total_hours = total_seconds / 3600
663
+
664
+ # 비용은 GPU 수만큼 곱함
665
+ total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0) * num_gpus
666
 
667
  return {
668
  'hours': round(total_hours, 2),
669
  'cost_usd': round(total_cost_usd, 2),
670
  'cost_krw': round(total_cost_usd * 1300, 0),
671
+ 'num_gpus': num_gpus,
672
+ 'gpu_type': gpu_type,
673
  }
674
 
675
 
676
  # =====================================================
677
+ # Custom Modeling Code (동일)
678
  # =====================================================
679
 
680
  def generate_modeling_phoenix_code():
 
896
 
897
 
898
  # =====================================================
899
+ # 저장/업로드/평가 (동일)
900
  # =====================================================
901
 
902
  def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
903
+ """PHOENIX 모델 저장"""
904
  output_path = Path(output_path)
905
  output_path.mkdir(parents=True, exist_ok=True)
906
 
907
  print(f"\n💾 Saving PHOENIX model...")
908
 
 
909
  if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
910
  if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
911
  model.lm_head.weight = model.model.embed_tokens.weight
 
913
  model.save_pretrained(output_path)
914
  tokenizer.save_pretrained(output_path)
915
 
 
916
  modeling_code = generate_modeling_phoenix_code()
917
  with open(output_path / "modeling_phoenix.py", "w") as f:
918
  f.write(modeling_code)
919
 
 
920
  config_path = output_path / "config.json"
921
  if config_path.exists():
922
  with open(config_path, "r") as f:
 
932
  with open(config_path, "w") as f:
933
  json.dump(config_dict, f, indent=2)
934
 
 
935
  with open(output_path / 'phoenix_metadata.json', 'w') as f:
936
  json.dump(metadata, f, indent=2)
937
 
 
938
  readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
939
 
940
+ **Multi-GPU Trained** with {metadata.get('num_gpus', 1)} GPUs
941
+
942
  ## Features
943
  - ✅ Brumby-style Retraining
944
  - ✅ O(n) Complexity
 
956
  )
957
  ```
958
 
959
+ **VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
960
  """
961
 
962
  with open(output_path / "README.md", "w") as f:
963
  f.write(readme)
964
 
965
+ print(f" ✅ Model saved")
966
 
967
 
 
 
 
 
968
  def upload_to_huggingface_hub(
969
  model_path: str,
970
  original_model_url: str,
 
972
  private: bool = True,
973
  token: str = None,
974
  ) -> Tuple[bool, str, str]:
975
+ """Upload to Hub"""
976
 
977
  if token is None:
978
  token = HF_TOKEN
 
1014
  return False, "", f"❌ Upload failed: {e}"
1015
 
1016
 
 
 
 
 
1017
  def evaluate_model_quality(model, tokenizer):
1018
+ """Quality 평가"""
1019
  test_prompts = [
1020
  "The capital of France is",
1021
  "In machine learning,",
 
1053
 
1054
 
1055
  # =====================================================
1056
+ # 🆕 Multi-GPU Burning 함수
1057
  # =====================================================
1058
 
1059
  def burn_model_with_finetuning(
 
1064
  num_steps: int = 3000,
1065
  batch_size: int = 4,
1066
  learning_rate: float = 1e-5,
1067
+ use_gradient_checkpointing: bool = True,
1068
  ):
1069
+ """🆕 v2.0: Multi-GPU Optimized Burning"""
1070
  print("="*80)
1071
+ print(f"🔥 PHOENIX Model Burning v2.0 - Multi-GPU ({NUM_GPUS} GPUs)")
1072
  print("="*80)
1073
 
1074
  output_path = Path(output_dir)
 
1079
  print(f"\n🔍 STEP 1: Structure Analysis...")
1080
  structure_info = analyze_model_structure(model_url)
1081
 
1082
+ # STEP 2: Load Model with device_map="auto"
1083
+ print(f"\n📥 STEP 2: Loading model (Multi-GPU)...")
1084
  start_time = time.time()
1085
 
1086
  config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
1087
+
1088
+ # ✅ Multi-GPU: device_map="auto"로 자동 분산
1089
  model = AutoModelForCausalLM.from_pretrained(
1090
  model_url,
1091
  trust_remote_code=True,
1092
  torch_dtype=torch.float16,
1093
+ device_map="auto" # 자동으로 8개 GPU에 분산!
1094
+ )
1095
 
1096
  tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
1097
  if tokenizer.pad_token is None:
1098
  tokenizer.pad_token = tokenizer.eos_token
1099
 
1100
  load_time = time.time() - start_time
1101
+ print(f"✅ Loaded across {NUM_GPUS} GPUs in {load_time:.1f}s")
1102
 
1103
  # STEP 3: Convert
1104
  print(f"\n🔄 STEP 3: Converting Attention → Retention...")
 
1115
 
1116
  print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
1117
 
1118
+ # STEP 4: Fine-tuning (Multi-GPU)
1119
  if enable_finetuning:
1120
+ print(f"\n🚀 STEP 4: Multi-GPU Fine-tuning...")
1121
  ft_start = time.time()
1122
 
1123
  model = finetune_retention_model(
 
1126
  num_steps=num_steps,
1127
  batch_size=batch_size,
1128
  learning_rate=learning_rate,
1129
+ use_gradient_checkpointing=use_gradient_checkpointing,
1130
  )
1131
 
1132
  ft_time = time.time() - ft_start
1133
  print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
1134
  else:
1135
  ft_time = 0
1136
+ print(f"\n⏭️ STEP 4: Fine-tuning skipped")
1137
 
1138
  # STEP 5: Evaluate
1139
  print(f"\n📊 STEP 5: Evaluating...")
 
1151
  'quality_score': quality_score,
1152
  'finetuned': enable_finetuning,
1153
  'finetuning_steps': num_steps if enable_finetuning else 0,
1154
+ 'num_gpus': NUM_GPUS,
1155
+ 'gradient_checkpointing': use_gradient_checkpointing,
1156
  'timestamp': datetime.now().isoformat(),
1157
  }
1158
 
 
1167
  'quality_score': quality_score,
1168
  'total_time': total_time,
1169
  'finetuned': enable_finetuning,
1170
+ 'num_gpus': NUM_GPUS,
1171
  'structure_info': structure_info,
1172
  }
1173
 
1174
  print(f"\n{'='*80}")
1175
+ print(f"✅ Multi-GPU Burning Complete!")
1176
+ print(f" GPUs Used: {NUM_GPUS}")
1177
  print(f" Model: {output_path}")
1178
  print(f" Quality: {quality_score:.2f}/1.00")
 
1179
  print(f"{'='*80}\n")
1180
 
1181
  return result
 
1190
 
1191
 
1192
  # =====================================================
1193
+ # Database (동일)
1194
  # =====================================================
1195
 
1196
  class ExperimentDatabase:
 
1210
  conversion_rate REAL,
1211
  quality_score REAL,
1212
  finetuned BOOLEAN,
1213
+ num_gpus INTEGER,
1214
  timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
1215
  )
1216
  """)
 
1221
  cursor = conn.cursor()
1222
  cursor.execute("""
1223
  INSERT INTO burning_history
1224
+ (model_url, output_path, hub_url, conversion_rate, quality_score, finetuned, num_gpus)
1225
+ VALUES (?, ?, ?, ?, ?, ?, ?)
1226
  """, (
1227
  info.get('model_url'),
1228
  info.get('output_path'),
 
1230
  info.get('conversion_rate'),
1231
  info.get('quality_score'),
1232
  info.get('finetuned'),
1233
+ info.get('num_gpus', 1),
1234
  ))
1235
  conn.commit()
1236
  return cursor.lastrowid
 
1258
  ft_steps,
1259
  ft_batch,
1260
  ft_lr,
1261
+ use_grad_ckpt,
1262
  upload_hub,
1263
  hub_repo,
1264
  hub_private,
1265
  ):
1266
+ """Gradio UI"""
1267
 
1268
  try:
1269
  if not model_url.strip():
 
1274
 
1275
  output_dir = f"{MODELS_PATH}/{output_name}"
1276
 
1277
+ # 비용 추정
1278
  if enable_finetuning:
1279
  model_size = "0.6B" if "0.6B" in model_url else "1.5B"
1280
+ cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch, NUM_GPUS)
1281
+ print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h with {NUM_GPUS} GPUs)")
1282
 
1283
  # Burn
1284
  result = burn_model_with_finetuning(
 
1289
  num_steps=ft_steps,
1290
  batch_size=ft_batch,
1291
  learning_rate=ft_lr,
1292
+ use_gradient_checkpointing=use_grad_ckpt,
1293
  )
1294
 
1295
  if result['status'] != 'success':
 
1313
  'conversion_rate': result['conversion_rate'],
1314
  'quality_score': result['quality_score'],
1315
  'finetuned': enable_finetuning,
1316
+ 'num_gpus': NUM_GPUS,
1317
  })
1318
 
1319
  # Output
1320
  output_md = f"""
1321
+ # 🔥 PHOENIX v2.0 Multi-GPU Complete!
1322
+
1323
+ ## Hardware
1324
+ - **GPUs Used**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
1325
 
1326
  ## Model Info
1327
  - **Original**: {model_url}
 
1329
  - **Conversion**: {result['conversion_rate']*100:.1f}%
1330
  - **Quality**: {result['quality_score']:.2f}/1.00
1331
  - **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
 
 
1332
  """
1333
 
1334
  if hub_url:
1335
  output_md += f"""
1336
+
1337
+ ## Hub Status
1338
  ✅ **Uploaded**: [{hub_url}]({hub_url})
1339
 
1340
  ```python
1341
  model = AutoModelForCausalLM.from_pretrained(
1342
  "{hub_url.replace('https://huggingface.co/', '')}",
1343
+ trust_remote_code=True,
1344
+ device_map="auto" # Multi-GPU
1345
  )
1346
  ```
1347
  """
 
 
1348
 
1349
  # Plot
1350
  fig = go.Figure()
 
1353
  y=[result['conversion_rate'], result['quality_score']],
1354
  marker_color=['#3b82f6', '#10b981']
1355
  ))
1356
+ fig.update_layout(title=f"Metrics ({NUM_GPUS} GPUs)", yaxis_range=[0, 1])
1357
 
1358
  return output_md, fig
1359
 
 
1363
 
1364
 
1365
  def view_history():
1366
+ """History"""
1367
  try:
1368
  history = db.get_history(20)
1369
  if not history:
 
1376
  x='timestamp',
1377
  y='quality_score',
1378
  color='finetuned',
1379
+ size='num_gpus',
1380
+ title='Burning History (Multi-GPU)'
1381
  )
1382
 
1383
  return f"## History\n\n{df.to_markdown(index=False)}", fig
 
1389
  # Gradio App
1390
  # =====================================================
1391
 
1392
+ with gr.Blocks(title="🔥 PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as demo:
1393
 
1394
+ gr.Markdown(f"""
1395
+ # 🔥 PHOENIX v2.0 - Multi-GPU Optimized
1396
 
1397
+ **H100 x {NUM_GPUS} GPUs Ready**
1398
 
1399
+ 🆕 **v2.0 Multi-GPU**: Accelerate 통합, DDP 지원
1400
+ 🆕 **v2.0**: Fine-tuning 파이프라인 (Brumby-style)
1401
+ ✅ v1.4.3: All fixes included
1402
  ✅ GQA Support | O(n) Complexity
1403
 
1404
  ---
 
1417
  burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
1418
 
1419
  gr.Markdown("---")
1420
+ gr.Markdown(f"### 🆕 Fine-tuning ({NUM_GPUS} GPUs)")
1421
 
1422
  burn_ft_enable = gr.Checkbox(
1423
  value=False,
1424
  label="🚀 Enable Fine-tuning (Brumby-style)",
1425
+ info=f"Multi-GPU acceleration with {NUM_GPUS} GPUs!"
1426
  )
1427
 
1428
  burn_ft_steps = gr.Slider(
1429
  1000, 10000, 3000,
1430
  step=100,
1431
+ label="Steps",
1432
  visible=False
1433
  )
1434
 
1435
+ burn_ft_batch = gr.Slider(
1436
+ 1, 16, 4,
1437
+ step=1,
1438
+ label=f"Batch Size per GPU ({NUM_GPUS} GPUs)",
1439
+ visible=False
1440
+ )
1441
  burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
1442
 
1443
+ burn_grad_ckpt = gr.Checkbox(
1444
+ value=True,
1445
+ label="✅ Gradient Checkpointing (saves memory)",
1446
+ visible=False
1447
+ )
1448
+
1449
  def toggle_ft(enabled):
1450
  return [
1451
  gr.update(visible=enabled),
1452
  gr.update(visible=enabled),
1453
  gr.update(visible=enabled),
1454
+ gr.update(visible=enabled),
1455
  ]
1456
 
1457
  burn_ft_enable.change(
1458
  toggle_ft,
1459
  [burn_ft_enable],
1460
+ [burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt]
1461
  )
1462
 
1463
  gr.Markdown("---")
 
1477
  burn_phoenix_model_ui,
1478
  [
1479
  burn_url, burn_hier, burn_name,
1480
+ burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt,
1481
  burn_upload, burn_repo, burn_private
1482
  ],
1483
  [burn_output, burn_plot]
 
1496
  gr.Markdown(f"""
1497
  ---
1498
 
1499
+ ## 🔥 PHOENIX v2.0 Multi-GPU
1500
+
1501
+ **Hardware**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
1502
 
1503
+ **Features**:
1504
+ - 🆕 Multi-GPU Training (DDP)
1505
+ - 🆕 Gradient Checkpointing
1506
+ - 🆕 H100 Optimized (fused optimizer)
1507
+ - 🆕 Brumby-style Fine-tuning
1508
+ - ✅ All v1.4.3 Fixes
1509
 
1510
  **Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
1511
+ **VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
1512
  """)
1513
 
1514