Update app.py
Browse files
app.py
CHANGED
|
@@ -1,20 +1,15 @@
|
|
| 1 |
"""
|
| 2 |
-
🔥 PHOENIX Retention Research Platform v2.0
|
| 3 |
-
|
| 4 |
|
| 5 |
-
✅ v2.0 NEW:
|
| 6 |
-
✅ v2.0 NEW:
|
| 7 |
-
✅ v2.0 NEW:
|
| 8 |
-
✅
|
| 9 |
-
✅
|
| 10 |
-
✅ v1.4.3
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
✅ GQA Support
|
| 14 |
-
✅ HuggingFace Hub Integration
|
| 15 |
-
|
| 16 |
-
VIDraft AI Research Lab - Complete Integrated Version v2.0
|
| 17 |
-
Based on Manifest AI's Brumby-14B Success
|
| 18 |
"""
|
| 19 |
|
| 20 |
import gradio as gr
|
|
@@ -46,10 +41,13 @@ import os
|
|
| 46 |
from huggingface_hub import HfApi, create_repo
|
| 47 |
|
| 48 |
# =====================================================
|
| 49 |
-
# 전역 설정
|
| 50 |
# =====================================================
|
| 51 |
|
|
|
|
| 52 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
| 53 |
STORAGE_PATH = "/data"
|
| 54 |
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
|
| 55 |
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
|
|
@@ -61,13 +59,15 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
| 61 |
Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
|
| 62 |
Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
|
| 63 |
|
| 64 |
-
print(f"🔥 PHOENIX Platform v2.0
|
| 65 |
print(f"💾 Storage: {STORAGE_PATH}")
|
| 66 |
print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
if HF_TOKEN:
|
| 68 |
print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
|
| 69 |
-
else:
|
| 70 |
-
print(f"⚠️ HuggingFace Token not found (upload disabled)")
|
| 71 |
|
| 72 |
# =====================================================
|
| 73 |
# 모델 구조 분석 함수
|
|
@@ -84,15 +84,14 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
|
|
| 84 |
config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
|
| 85 |
|
| 86 |
print(f"✅ Config loaded")
|
| 87 |
-
print(f" Architecture: {config.architectures if hasattr(config, 'architectures') else 'Unknown'}")
|
| 88 |
-
print(f" Model Type: {config.model_type if hasattr(config, 'model_type') else 'Unknown'}")
|
| 89 |
|
| 90 |
-
|
|
|
|
| 91 |
model = AutoModelForCausalLM.from_pretrained(
|
| 92 |
model_url,
|
| 93 |
trust_remote_code=True,
|
| 94 |
torch_dtype=torch.float16,
|
| 95 |
-
device_map="cpu"
|
| 96 |
)
|
| 97 |
|
| 98 |
analysis = {
|
|
@@ -108,15 +107,13 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
|
|
| 108 |
'layer_path': None,
|
| 109 |
}
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
layers = None
|
| 114 |
layer_path = None
|
| 115 |
|
| 116 |
possible_paths = [
|
| 117 |
('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
|
| 118 |
('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
|
| 119 |
-
('layers', lambda m: m.layers if hasattr(m, 'layers') else None),
|
| 120 |
]
|
| 121 |
|
| 122 |
for path_name, path_fn in possible_paths:
|
|
@@ -124,49 +121,29 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
|
|
| 124 |
if result is not None:
|
| 125 |
layers = result
|
| 126 |
layer_path = path_name
|
| 127 |
-
print(f" ✅ Found layers at: {path_name}")
|
| 128 |
break
|
| 129 |
|
| 130 |
-
if layers
|
| 131 |
-
|
| 132 |
-
analysis['
|
| 133 |
-
return analysis
|
| 134 |
-
|
| 135 |
-
analysis['total_layers'] = len(layers)
|
| 136 |
-
analysis['layer_path'] = layer_path
|
| 137 |
-
|
| 138 |
-
print(f" Total Layers: {len(layers)}")
|
| 139 |
-
|
| 140 |
-
if len(layers) > 0:
|
| 141 |
-
first_layer = layers[0]
|
| 142 |
-
print(f"\n🔬 Analyzing first layer...")
|
| 143 |
|
| 144 |
-
if
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
if hasattr(attn, 'q_proj'):
|
| 151 |
-
q_shape = attn.q_proj.weight.shape
|
| 152 |
-
k_shape = attn.k_proj.weight.shape
|
| 153 |
-
|
| 154 |
-
print(f" Q projection: {q_shape}")
|
| 155 |
-
print(f" K projection: {k_shape}")
|
| 156 |
-
|
| 157 |
-
if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
|
| 158 |
-
head_dim = q_shape[0] // config.num_attention_heads
|
| 159 |
-
analysis['head_dim'] = head_dim
|
| 160 |
-
print(f" Calculated head_dim: {head_dim}")
|
| 161 |
|
| 162 |
-
if
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
print(f"\n{'='*80}\n")
|
| 172 |
|
|
@@ -177,10 +154,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
|
|
| 177 |
|
| 178 |
except Exception as e:
|
| 179 |
import traceback
|
| 180 |
-
|
| 181 |
-
print(f"\n❌ Structure analysis failed:")
|
| 182 |
-
print(error_msg)
|
| 183 |
-
|
| 184 |
return {
|
| 185 |
'model_url': model_url,
|
| 186 |
'error': str(e),
|
|
@@ -189,7 +163,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
|
|
| 189 |
|
| 190 |
|
| 191 |
# =====================================================
|
| 192 |
-
# PHOENIX Retention
|
| 193 |
# =====================================================
|
| 194 |
|
| 195 |
class MultiScaleRetention(nn.Module):
|
|
@@ -233,7 +207,7 @@ class MultiScaleRetention(nn.Module):
|
|
| 233 |
)
|
| 234 |
|
| 235 |
def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
|
| 236 |
-
"""Repeat K/V heads
|
| 237 |
batch, num_key_value_heads, slen, head_dim = hidden_states.shape
|
| 238 |
if n_rep == 1:
|
| 239 |
return hidden_states
|
|
@@ -255,13 +229,12 @@ class MultiScaleRetention(nn.Module):
|
|
| 255 |
past_key_values: Optional[Tuple[torch.Tensor]] = None,
|
| 256 |
**kwargs
|
| 257 |
):
|
| 258 |
-
"""O(n) Retention
|
| 259 |
batch_size, seq_len, _ = hidden_states.shape
|
| 260 |
|
| 261 |
target_device = hidden_states.device
|
| 262 |
target_dtype = hidden_states.dtype
|
| 263 |
|
| 264 |
-
# ✅ v1.4.3 FIX: dtype과 device 모두 일치
|
| 265 |
if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
|
| 266 |
self.to(device=target_device, dtype=target_dtype)
|
| 267 |
|
|
@@ -348,7 +321,7 @@ class MultiScaleRetention(nn.Module):
|
|
| 348 |
|
| 349 |
|
| 350 |
class HierarchicalRetention(nn.Module):
|
| 351 |
-
"""PHOENIX Hierarchical Retention
|
| 352 |
|
| 353 |
def __init__(self, config, layer_idx=0):
|
| 354 |
super().__init__()
|
|
@@ -386,7 +359,6 @@ class HierarchicalRetention(nn.Module):
|
|
| 386 |
target_device = hidden_states.device
|
| 387 |
target_dtype = hidden_states.dtype
|
| 388 |
|
| 389 |
-
# ✅ v1.4.3 FIX: dtype과 device 모두 일치
|
| 390 |
if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
|
| 391 |
self.to(device=target_device, dtype=target_dtype)
|
| 392 |
|
|
@@ -432,18 +404,16 @@ class HierarchicalRetention(nn.Module):
|
|
| 432 |
# =====================================================
|
| 433 |
|
| 434 |
def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
|
| 435 |
-
"""Transformer Attention → PHOENIX Retention
|
| 436 |
print("🔄 Starting Attention → Retention conversion...")
|
| 437 |
|
| 438 |
replaced_count = 0
|
| 439 |
total_layers = 0
|
| 440 |
|
| 441 |
layers = None
|
| 442 |
-
layer_path = None
|
| 443 |
|
| 444 |
if structure_info and structure_info.get('layer_path'):
|
| 445 |
layer_path = structure_info['layer_path']
|
| 446 |
-
print(f" Using structure info: {layer_path}")
|
| 447 |
|
| 448 |
if layer_path == 'model.layers':
|
| 449 |
if hasattr(model, 'model') and hasattr(model.model, 'layers'):
|
|
@@ -462,8 +432,6 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
|
|
| 462 |
result = path_fn(model)
|
| 463 |
if result is not None:
|
| 464 |
layers = result
|
| 465 |
-
layer_path = path_name
|
| 466 |
-
print(f" ✅ Found layers at: {path_name}")
|
| 467 |
break
|
| 468 |
|
| 469 |
if layers is None:
|
|
@@ -509,7 +477,7 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
|
|
| 509 |
|
| 510 |
|
| 511 |
# =====================================================
|
| 512 |
-
#
|
| 513 |
# =====================================================
|
| 514 |
|
| 515 |
def finetune_retention_model(
|
|
@@ -519,42 +487,74 @@ def finetune_retention_model(
|
|
| 519 |
batch_size: int = 4,
|
| 520 |
learning_rate: float = 1e-5,
|
| 521 |
output_dir: str = "/data/finetuning_temp",
|
| 522 |
-
|
| 523 |
):
|
| 524 |
"""
|
| 525 |
-
🆕 v2.0: Brumby-style Retraining
|
| 526 |
"""
|
| 527 |
print("\n" + "="*80)
|
| 528 |
-
print("🔥 PHOENIX RETRAINING -
|
| 529 |
print("="*80)
|
|
|
|
| 530 |
print(f" Target Steps: {num_steps}")
|
| 531 |
-
print(f" Batch Size: {batch_size}")
|
|
|
|
| 532 |
print(f" Learning Rate: {learning_rate}")
|
|
|
|
| 533 |
|
| 534 |
start_time = time.time()
|
| 535 |
|
| 536 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
train_dataset = prepare_simple_dataset(
|
| 538 |
tokenizer=tokenizer,
|
| 539 |
num_steps=num_steps,
|
| 540 |
-
batch_size=batch_size
|
| 541 |
)
|
| 542 |
|
| 543 |
-
# Training
|
| 544 |
training_args = TrainingArguments(
|
| 545 |
output_dir=output_dir,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
num_train_epochs=1,
|
| 547 |
-
|
| 548 |
learning_rate=learning_rate,
|
| 549 |
warmup_steps=100,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
logging_steps=50,
|
|
|
|
| 551 |
save_steps=1000,
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
dataloader_num_workers=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 556 |
remove_unused_columns=False,
|
| 557 |
report_to="none",
|
|
|
|
|
|
|
|
|
|
| 558 |
)
|
| 559 |
|
| 560 |
# Data collator
|
|
@@ -563,7 +563,7 @@ def finetune_retention_model(
|
|
| 563 |
mlm=False
|
| 564 |
)
|
| 565 |
|
| 566 |
-
# Trainer
|
| 567 |
trainer = Trainer(
|
| 568 |
model=model,
|
| 569 |
args=training_args,
|
|
@@ -573,13 +573,16 @@ def finetune_retention_model(
|
|
| 573 |
)
|
| 574 |
|
| 575 |
# Train!
|
| 576 |
-
print(f"\n🚀 Starting Fine-tuning...")
|
|
|
|
|
|
|
| 577 |
trainer.train()
|
| 578 |
|
| 579 |
elapsed = time.time() - start_time
|
| 580 |
|
| 581 |
print(f"\n✅ Fine-tuning Complete!")
|
| 582 |
print(f" Time: {elapsed/60:.1f} minutes")
|
|
|
|
| 583 |
print(f"="*80 + "\n")
|
| 584 |
|
| 585 |
return model
|
|
@@ -591,7 +594,7 @@ def prepare_simple_dataset(
|
|
| 591 |
batch_size: int,
|
| 592 |
max_length: int = 2048,
|
| 593 |
):
|
| 594 |
-
"""
|
| 595 |
print(f"\n📊 Preparing Dataset...")
|
| 596 |
|
| 597 |
num_samples = num_steps * batch_size
|
|
@@ -620,7 +623,8 @@ def prepare_simple_dataset(
|
|
| 620 |
tokenized = dataset.map(
|
| 621 |
tokenize_function,
|
| 622 |
batched=True,
|
| 623 |
-
remove_columns=dataset.column_names
|
|
|
|
| 624 |
)
|
| 625 |
|
| 626 |
print(f" ✅ Tokenized: {len(tokenized)} samples")
|
|
@@ -632,14 +636,14 @@ def estimate_finetuning_cost(
|
|
| 632 |
model_size: str,
|
| 633 |
num_steps: int,
|
| 634 |
batch_size: int,
|
| 635 |
-
|
|
|
|
| 636 |
) -> Dict:
|
| 637 |
-
"""
|
| 638 |
gpu_costs = {
|
| 639 |
"H100": 3.0,
|
| 640 |
"A100": 2.0,
|
| 641 |
"A10G": 1.0,
|
| 642 |
-
"T4": 0.5,
|
| 643 |
}
|
| 644 |
|
| 645 |
model_step_times = {
|
|
@@ -650,20 +654,27 @@ def estimate_finetuning_cost(
|
|
| 650 |
"14B": 6.0,
|
| 651 |
}
|
| 652 |
|
|
|
|
| 653 |
step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
|
| 654 |
-
|
|
|
|
|
|
|
| 655 |
total_hours = total_seconds / 3600
|
| 656 |
-
|
|
|
|
|
|
|
| 657 |
|
| 658 |
return {
|
| 659 |
'hours': round(total_hours, 2),
|
| 660 |
'cost_usd': round(total_cost_usd, 2),
|
| 661 |
'cost_krw': round(total_cost_usd * 1300, 0),
|
|
|
|
|
|
|
| 662 |
}
|
| 663 |
|
| 664 |
|
| 665 |
# =====================================================
|
| 666 |
-
# Custom Modeling Code
|
| 667 |
# =====================================================
|
| 668 |
|
| 669 |
def generate_modeling_phoenix_code():
|
|
@@ -885,17 +896,16 @@ AutoConfig.register("phoenix", PhoenixConfig)
|
|
| 885 |
|
| 886 |
|
| 887 |
# =====================================================
|
| 888 |
-
#
|
| 889 |
# =====================================================
|
| 890 |
|
| 891 |
def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
|
| 892 |
-
"""PHOENIX 모델 저장
|
| 893 |
output_path = Path(output_path)
|
| 894 |
output_path.mkdir(parents=True, exist_ok=True)
|
| 895 |
|
| 896 |
print(f"\n💾 Saving PHOENIX model...")
|
| 897 |
|
| 898 |
-
# Embedding Tying
|
| 899 |
if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
|
| 900 |
if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
|
| 901 |
model.lm_head.weight = model.model.embed_tokens.weight
|
|
@@ -903,12 +913,10 @@ def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_u
|
|
| 903 |
model.save_pretrained(output_path)
|
| 904 |
tokenizer.save_pretrained(output_path)
|
| 905 |
|
| 906 |
-
# Custom code
|
| 907 |
modeling_code = generate_modeling_phoenix_code()
|
| 908 |
with open(output_path / "modeling_phoenix.py", "w") as f:
|
| 909 |
f.write(modeling_code)
|
| 910 |
|
| 911 |
-
# Config
|
| 912 |
config_path = output_path / "config.json"
|
| 913 |
if config_path.exists():
|
| 914 |
with open(config_path, "r") as f:
|
|
@@ -924,13 +932,13 @@ def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_u
|
|
| 924 |
with open(config_path, "w") as f:
|
| 925 |
json.dump(config_dict, f, indent=2)
|
| 926 |
|
| 927 |
-
# Metadata
|
| 928 |
with open(output_path / 'phoenix_metadata.json', 'w') as f:
|
| 929 |
json.dump(metadata, f, indent=2)
|
| 930 |
|
| 931 |
-
# README
|
| 932 |
readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
|
| 933 |
|
|
|
|
|
|
|
| 934 |
## Features
|
| 935 |
- ✅ Brumby-style Retraining
|
| 936 |
- ✅ O(n) Complexity
|
|
@@ -948,19 +956,15 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 948 |
)
|
| 949 |
```
|
| 950 |
|
| 951 |
-
**VIDraft AI Research Lab** | PHOENIX v2.0
|
| 952 |
"""
|
| 953 |
|
| 954 |
with open(output_path / "README.md", "w") as f:
|
| 955 |
f.write(readme)
|
| 956 |
|
| 957 |
-
print(f" ✅ Model saved
|
| 958 |
|
| 959 |
|
| 960 |
-
# =====================================================
|
| 961 |
-
# 업로드 함수
|
| 962 |
-
# =====================================================
|
| 963 |
-
|
| 964 |
def upload_to_huggingface_hub(
|
| 965 |
model_path: str,
|
| 966 |
original_model_url: str,
|
|
@@ -968,7 +972,7 @@ def upload_to_huggingface_hub(
|
|
| 968 |
private: bool = True,
|
| 969 |
token: str = None,
|
| 970 |
) -> Tuple[bool, str, str]:
|
| 971 |
-
"""Upload
|
| 972 |
|
| 973 |
if token is None:
|
| 974 |
token = HF_TOKEN
|
|
@@ -1010,12 +1014,8 @@ def upload_to_huggingface_hub(
|
|
| 1010 |
return False, "", f"❌ Upload failed: {e}"
|
| 1011 |
|
| 1012 |
|
| 1013 |
-
# =====================================================
|
| 1014 |
-
# 평가 함수
|
| 1015 |
-
# =====================================================
|
| 1016 |
-
|
| 1017 |
def evaluate_model_quality(model, tokenizer):
|
| 1018 |
-
"""
|
| 1019 |
test_prompts = [
|
| 1020 |
"The capital of France is",
|
| 1021 |
"In machine learning,",
|
|
@@ -1053,7 +1053,7 @@ def evaluate_model_quality(model, tokenizer):
|
|
| 1053 |
|
| 1054 |
|
| 1055 |
# =====================================================
|
| 1056 |
-
#
|
| 1057 |
# =====================================================
|
| 1058 |
|
| 1059 |
def burn_model_with_finetuning(
|
|
@@ -1064,10 +1064,11 @@ def burn_model_with_finetuning(
|
|
| 1064 |
num_steps: int = 3000,
|
| 1065 |
batch_size: int = 4,
|
| 1066 |
learning_rate: float = 1e-5,
|
|
|
|
| 1067 |
):
|
| 1068 |
-
"""🆕 v2.0:
|
| 1069 |
print("="*80)
|
| 1070 |
-
print("🔥 PHOENIX Model Burning v2.0")
|
| 1071 |
print("="*80)
|
| 1072 |
|
| 1073 |
output_path = Path(output_dir)
|
|
@@ -1078,23 +1079,26 @@ def burn_model_with_finetuning(
|
|
| 1078 |
print(f"\n🔍 STEP 1: Structure Analysis...")
|
| 1079 |
structure_info = analyze_model_structure(model_url)
|
| 1080 |
|
| 1081 |
-
# STEP 2: Load Model
|
| 1082 |
-
print(f"\n📥 STEP 2: Loading model...")
|
| 1083 |
start_time = time.time()
|
| 1084 |
|
| 1085 |
config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
|
|
|
|
|
|
|
| 1086 |
model = AutoModelForCausalLM.from_pretrained(
|
| 1087 |
model_url,
|
| 1088 |
trust_remote_code=True,
|
| 1089 |
torch_dtype=torch.float16,
|
| 1090 |
-
|
|
|
|
| 1091 |
|
| 1092 |
tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
|
| 1093 |
if tokenizer.pad_token is None:
|
| 1094 |
tokenizer.pad_token = tokenizer.eos_token
|
| 1095 |
|
| 1096 |
load_time = time.time() - start_time
|
| 1097 |
-
print(f"✅ Loaded in {load_time:.1f}s")
|
| 1098 |
|
| 1099 |
# STEP 3: Convert
|
| 1100 |
print(f"\n🔄 STEP 3: Converting Attention → Retention...")
|
|
@@ -1111,9 +1115,9 @@ def burn_model_with_finetuning(
|
|
| 1111 |
|
| 1112 |
print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
|
| 1113 |
|
| 1114 |
-
#
|
| 1115 |
if enable_finetuning:
|
| 1116 |
-
print(f"\n🚀 STEP 4: Fine-tuning
|
| 1117 |
ft_start = time.time()
|
| 1118 |
|
| 1119 |
model = finetune_retention_model(
|
|
@@ -1122,13 +1126,14 @@ def burn_model_with_finetuning(
|
|
| 1122 |
num_steps=num_steps,
|
| 1123 |
batch_size=batch_size,
|
| 1124 |
learning_rate=learning_rate,
|
|
|
|
| 1125 |
)
|
| 1126 |
|
| 1127 |
ft_time = time.time() - ft_start
|
| 1128 |
print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
|
| 1129 |
else:
|
| 1130 |
ft_time = 0
|
| 1131 |
-
print(f"\n⏭️ STEP 4: Fine-tuning skipped
|
| 1132 |
|
| 1133 |
# STEP 5: Evaluate
|
| 1134 |
print(f"\n📊 STEP 5: Evaluating...")
|
|
@@ -1146,6 +1151,8 @@ def burn_model_with_finetuning(
|
|
| 1146 |
'quality_score': quality_score,
|
| 1147 |
'finetuned': enable_finetuning,
|
| 1148 |
'finetuning_steps': num_steps if enable_finetuning else 0,
|
|
|
|
|
|
|
| 1149 |
'timestamp': datetime.now().isoformat(),
|
| 1150 |
}
|
| 1151 |
|
|
@@ -1160,14 +1167,15 @@ def burn_model_with_finetuning(
|
|
| 1160 |
'quality_score': quality_score,
|
| 1161 |
'total_time': total_time,
|
| 1162 |
'finetuned': enable_finetuning,
|
|
|
|
| 1163 |
'structure_info': structure_info,
|
| 1164 |
}
|
| 1165 |
|
| 1166 |
print(f"\n{'='*80}")
|
| 1167 |
-
print(f"✅ Burning Complete!")
|
|
|
|
| 1168 |
print(f" Model: {output_path}")
|
| 1169 |
print(f" Quality: {quality_score:.2f}/1.00")
|
| 1170 |
-
print(f" Fine-tuned: {enable_finetuning}")
|
| 1171 |
print(f"{'='*80}\n")
|
| 1172 |
|
| 1173 |
return result
|
|
@@ -1182,7 +1190,7 @@ def burn_model_with_finetuning(
|
|
| 1182 |
|
| 1183 |
|
| 1184 |
# =====================================================
|
| 1185 |
-
# Database
|
| 1186 |
# =====================================================
|
| 1187 |
|
| 1188 |
class ExperimentDatabase:
|
|
@@ -1202,6 +1210,7 @@ class ExperimentDatabase:
|
|
| 1202 |
conversion_rate REAL,
|
| 1203 |
quality_score REAL,
|
| 1204 |
finetuned BOOLEAN,
|
|
|
|
| 1205 |
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 1206 |
)
|
| 1207 |
""")
|
|
@@ -1212,8 +1221,8 @@ class ExperimentDatabase:
|
|
| 1212 |
cursor = conn.cursor()
|
| 1213 |
cursor.execute("""
|
| 1214 |
INSERT INTO burning_history
|
| 1215 |
-
(model_url, output_path, hub_url, conversion_rate, quality_score, finetuned)
|
| 1216 |
-
VALUES (?, ?, ?, ?, ?, ?)
|
| 1217 |
""", (
|
| 1218 |
info.get('model_url'),
|
| 1219 |
info.get('output_path'),
|
|
@@ -1221,6 +1230,7 @@ class ExperimentDatabase:
|
|
| 1221 |
info.get('conversion_rate'),
|
| 1222 |
info.get('quality_score'),
|
| 1223 |
info.get('finetuned'),
|
|
|
|
| 1224 |
))
|
| 1225 |
conn.commit()
|
| 1226 |
return cursor.lastrowid
|
|
@@ -1248,11 +1258,12 @@ def burn_phoenix_model_ui(
|
|
| 1248 |
ft_steps,
|
| 1249 |
ft_batch,
|
| 1250 |
ft_lr,
|
|
|
|
| 1251 |
upload_hub,
|
| 1252 |
hub_repo,
|
| 1253 |
hub_private,
|
| 1254 |
):
|
| 1255 |
-
"""Gradio UI
|
| 1256 |
|
| 1257 |
try:
|
| 1258 |
if not model_url.strip():
|
|
@@ -1263,11 +1274,11 @@ def burn_phoenix_model_ui(
|
|
| 1263 |
|
| 1264 |
output_dir = f"{MODELS_PATH}/{output_name}"
|
| 1265 |
|
| 1266 |
-
#
|
| 1267 |
if enable_finetuning:
|
| 1268 |
model_size = "0.6B" if "0.6B" in model_url else "1.5B"
|
| 1269 |
-
cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch)
|
| 1270 |
-
print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h)")
|
| 1271 |
|
| 1272 |
# Burn
|
| 1273 |
result = burn_model_with_finetuning(
|
|
@@ -1278,6 +1289,7 @@ def burn_phoenix_model_ui(
|
|
| 1278 |
num_steps=ft_steps,
|
| 1279 |
batch_size=ft_batch,
|
| 1280 |
learning_rate=ft_lr,
|
|
|
|
| 1281 |
)
|
| 1282 |
|
| 1283 |
if result['status'] != 'success':
|
|
@@ -1301,11 +1313,15 @@ def burn_phoenix_model_ui(
|
|
| 1301 |
'conversion_rate': result['conversion_rate'],
|
| 1302 |
'quality_score': result['quality_score'],
|
| 1303 |
'finetuned': enable_finetuning,
|
|
|
|
| 1304 |
})
|
| 1305 |
|
| 1306 |
# Output
|
| 1307 |
output_md = f"""
|
| 1308 |
-
# 🔥 PHOENIX v2.0
|
|
|
|
|
|
|
|
|
|
| 1309 |
|
| 1310 |
## Model Info
|
| 1311 |
- **Original**: {model_url}
|
|
@@ -1313,23 +1329,22 @@ def burn_phoenix_model_ui(
|
|
| 1313 |
- **Conversion**: {result['conversion_rate']*100:.1f}%
|
| 1314 |
- **Quality**: {result['quality_score']:.2f}/1.00
|
| 1315 |
- **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
|
| 1316 |
-
|
| 1317 |
-
## Hub Status
|
| 1318 |
"""
|
| 1319 |
|
| 1320 |
if hub_url:
|
| 1321 |
output_md += f"""
|
|
|
|
|
|
|
| 1322 |
✅ **Uploaded**: [{hub_url}]({hub_url})
|
| 1323 |
|
| 1324 |
```python
|
| 1325 |
model = AutoModelForCausalLM.from_pretrained(
|
| 1326 |
"{hub_url.replace('https://huggingface.co/', '')}",
|
| 1327 |
-
trust_remote_code=True
|
|
|
|
| 1328 |
)
|
| 1329 |
```
|
| 1330 |
"""
|
| 1331 |
-
else:
|
| 1332 |
-
output_md += "⏭️ **Upload Skipped**"
|
| 1333 |
|
| 1334 |
# Plot
|
| 1335 |
fig = go.Figure()
|
|
@@ -1338,7 +1353,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 1338 |
y=[result['conversion_rate'], result['quality_score']],
|
| 1339 |
marker_color=['#3b82f6', '#10b981']
|
| 1340 |
))
|
| 1341 |
-
fig.update_layout(title="Metrics", yaxis_range=[0, 1])
|
| 1342 |
|
| 1343 |
return output_md, fig
|
| 1344 |
|
|
@@ -1348,7 +1363,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 1348 |
|
| 1349 |
|
| 1350 |
def view_history():
|
| 1351 |
-
"""
|
| 1352 |
try:
|
| 1353 |
history = db.get_history(20)
|
| 1354 |
if not history:
|
|
@@ -1361,7 +1376,8 @@ def view_history():
|
|
| 1361 |
x='timestamp',
|
| 1362 |
y='quality_score',
|
| 1363 |
color='finetuned',
|
| 1364 |
-
|
|
|
|
| 1365 |
)
|
| 1366 |
|
| 1367 |
return f"## History\n\n{df.to_markdown(index=False)}", fig
|
|
@@ -1373,16 +1389,16 @@ def view_history():
|
|
| 1373 |
# Gradio App
|
| 1374 |
# =====================================================
|
| 1375 |
|
| 1376 |
-
with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
|
| 1377 |
|
| 1378 |
-
gr.Markdown("""
|
| 1379 |
-
# 🔥 PHOENIX v2.0 -
|
| 1380 |
|
| 1381 |
-
**
|
| 1382 |
|
| 1383 |
-
🆕 **v2.0
|
| 1384 |
-
|
| 1385 |
-
✅ v1.4.3:
|
| 1386 |
✅ GQA Support | O(n) Complexity
|
| 1387 |
|
| 1388 |
---
|
|
@@ -1401,35 +1417,47 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
|
|
| 1401 |
burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
|
| 1402 |
|
| 1403 |
gr.Markdown("---")
|
| 1404 |
-
gr.Markdown("### 🆕 Fine-tuning (
|
| 1405 |
|
| 1406 |
burn_ft_enable = gr.Checkbox(
|
| 1407 |
value=False,
|
| 1408 |
label="🚀 Enable Fine-tuning (Brumby-style)",
|
| 1409 |
-
info="
|
| 1410 |
)
|
| 1411 |
|
| 1412 |
burn_ft_steps = gr.Slider(
|
| 1413 |
1000, 10000, 3000,
|
| 1414 |
step=100,
|
| 1415 |
-
label="Steps
|
| 1416 |
visible=False
|
| 1417 |
)
|
| 1418 |
|
| 1419 |
-
burn_ft_batch = gr.Slider(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1420 |
burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
|
| 1421 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1422 |
def toggle_ft(enabled):
|
| 1423 |
return [
|
| 1424 |
gr.update(visible=enabled),
|
| 1425 |
gr.update(visible=enabled),
|
| 1426 |
gr.update(visible=enabled),
|
|
|
|
| 1427 |
]
|
| 1428 |
|
| 1429 |
burn_ft_enable.change(
|
| 1430 |
toggle_ft,
|
| 1431 |
[burn_ft_enable],
|
| 1432 |
-
[burn_ft_steps, burn_ft_batch, burn_ft_lr]
|
| 1433 |
)
|
| 1434 |
|
| 1435 |
gr.Markdown("---")
|
|
@@ -1449,7 +1477,7 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
|
|
| 1449 |
burn_phoenix_model_ui,
|
| 1450 |
[
|
| 1451 |
burn_url, burn_hier, burn_name,
|
| 1452 |
-
burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr,
|
| 1453 |
burn_upload, burn_repo, burn_private
|
| 1454 |
],
|
| 1455 |
[burn_output, burn_plot]
|
|
@@ -1468,16 +1496,19 @@ with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
|
|
| 1468 |
gr.Markdown(f"""
|
| 1469 |
---
|
| 1470 |
|
| 1471 |
-
## 🔥 PHOENIX v2.0
|
|
|
|
|
|
|
| 1472 |
|
| 1473 |
-
**
|
| 1474 |
-
- 🆕
|
| 1475 |
-
- 🆕
|
| 1476 |
-
- 🆕
|
| 1477 |
-
-
|
|
|
|
| 1478 |
|
| 1479 |
**Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
|
| 1480 |
-
**VIDraft AI Research Lab** | PHOENIX v2.0
|
| 1481 |
""")
|
| 1482 |
|
| 1483 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
🔥 PHOENIX Retention Research Platform v2.0 - MULTI-GPU OPTIMIZED
|
| 3 |
+
H100 x 8 GPU 최적화 버전
|
| 4 |
|
| 5 |
+
✅ v2.0 NEW: Multi-GPU (8x H100) 최적화
|
| 6 |
+
✅ v2.0 NEW: Accelerate 통합
|
| 7 |
+
✅ v2.0 NEW: DeepSpeed ZeRO-3 지원
|
| 8 |
+
✅ v2.0 NEW: Gradient Checkpointing
|
| 9 |
+
✅ Fine-tuning 파이프라인 (Brumby-style)
|
| 10 |
+
✅ 모든 v1.4.3 수정사항 포함
|
| 11 |
+
|
| 12 |
+
VIDraft AI Research Lab - Multi-GPU Version v2.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
|
| 15 |
import gradio as gr
|
|
|
|
| 41 |
from huggingface_hub import HfApi, create_repo
|
| 42 |
|
| 43 |
# =====================================================
|
| 44 |
+
# 전역 설정 - MULTI-GPU
|
| 45 |
# =====================================================
|
| 46 |
|
| 47 |
+
# GPU 설정
|
| 48 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 49 |
+
NUM_GPUS = torch.cuda.device_count()
|
| 50 |
+
|
| 51 |
STORAGE_PATH = "/data"
|
| 52 |
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
|
| 53 |
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
|
|
|
|
| 59 |
Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
|
| 60 |
Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
|
| 61 |
|
| 62 |
+
print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
|
| 63 |
print(f"💾 Storage: {STORAGE_PATH}")
|
| 64 |
print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
|
| 65 |
+
print(f"🚀 GPUs Available: {NUM_GPUS}")
|
| 66 |
+
if NUM_GPUS > 0:
|
| 67 |
+
for i in range(NUM_GPUS):
|
| 68 |
+
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
|
| 69 |
if HF_TOKEN:
|
| 70 |
print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
|
|
|
|
|
|
|
| 71 |
|
| 72 |
# =====================================================
|
| 73 |
# 모델 구조 분석 함수
|
|
|
|
| 84 |
config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
|
| 85 |
|
| 86 |
print(f"✅ Config loaded")
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
# ✅ Multi-GPU: CPU로만 로드 (분석용)
|
| 89 |
+
print(f"\n📦 Loading model structure (CPU only)...")
|
| 90 |
model = AutoModelForCausalLM.from_pretrained(
|
| 91 |
model_url,
|
| 92 |
trust_remote_code=True,
|
| 93 |
torch_dtype=torch.float16,
|
| 94 |
+
device_map="cpu" # Analysis만 CPU에서
|
| 95 |
)
|
| 96 |
|
| 97 |
analysis = {
|
|
|
|
| 107 |
'layer_path': None,
|
| 108 |
}
|
| 109 |
|
| 110 |
+
# Layer 분석
|
|
|
|
| 111 |
layers = None
|
| 112 |
layer_path = None
|
| 113 |
|
| 114 |
possible_paths = [
|
| 115 |
('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
|
| 116 |
('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
|
|
|
|
| 117 |
]
|
| 118 |
|
| 119 |
for path_name, path_fn in possible_paths:
|
|
|
|
| 121 |
if result is not None:
|
| 122 |
layers = result
|
| 123 |
layer_path = path_name
|
|
|
|
| 124 |
break
|
| 125 |
|
| 126 |
+
if layers:
|
| 127 |
+
analysis['total_layers'] = len(layers)
|
| 128 |
+
analysis['layer_path'] = layer_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
if len(layers) > 0:
|
| 131 |
+
first_layer = layers[0]
|
| 132 |
+
if hasattr(first_layer, 'self_attn'):
|
| 133 |
+
analysis['has_self_attn'] = True
|
| 134 |
+
attn = first_layer.self_attn
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
if hasattr(attn, 'q_proj'):
|
| 137 |
+
q_shape = attn.q_proj.weight.shape
|
| 138 |
+
k_shape = attn.k_proj.weight.shape
|
| 139 |
+
|
| 140 |
+
if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
|
| 141 |
+
head_dim = q_shape[0] // config.num_attention_heads
|
| 142 |
+
analysis['head_dim'] = head_dim
|
| 143 |
+
|
| 144 |
+
analysis['gqa_detected'] = (k_shape[0] != q_shape[0])
|
| 145 |
+
analysis['q_dim'] = q_shape[0]
|
| 146 |
+
analysis['k_dim'] = k_shape[0]
|
| 147 |
|
| 148 |
print(f"\n{'='*80}\n")
|
| 149 |
|
|
|
|
| 154 |
|
| 155 |
except Exception as e:
|
| 156 |
import traceback
|
| 157 |
+
print(f"\n❌ Structure analysis failed: {e}")
|
|
|
|
|
|
|
|
|
|
| 158 |
return {
|
| 159 |
'model_url': model_url,
|
| 160 |
'error': str(e),
|
|
|
|
| 163 |
|
| 164 |
|
| 165 |
# =====================================================
|
| 166 |
+
# PHOENIX Retention (동일)
|
| 167 |
# =====================================================
|
| 168 |
|
| 169 |
class MultiScaleRetention(nn.Module):
|
|
|
|
| 207 |
)
|
| 208 |
|
| 209 |
def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
|
| 210 |
+
"""Repeat K/V heads (GQA)"""
|
| 211 |
batch, num_key_value_heads, slen, head_dim = hidden_states.shape
|
| 212 |
if n_rep == 1:
|
| 213 |
return hidden_states
|
|
|
|
| 229 |
past_key_values: Optional[Tuple[torch.Tensor]] = None,
|
| 230 |
**kwargs
|
| 231 |
):
|
| 232 |
+
"""O(n) Retention"""
|
| 233 |
batch_size, seq_len, _ = hidden_states.shape
|
| 234 |
|
| 235 |
target_device = hidden_states.device
|
| 236 |
target_dtype = hidden_states.dtype
|
| 237 |
|
|
|
|
| 238 |
if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
|
| 239 |
self.to(device=target_device, dtype=target_dtype)
|
| 240 |
|
|
|
|
| 321 |
|
| 322 |
|
| 323 |
class HierarchicalRetention(nn.Module):
|
| 324 |
+
"""PHOENIX Hierarchical Retention"""
|
| 325 |
|
| 326 |
def __init__(self, config, layer_idx=0):
|
| 327 |
super().__init__()
|
|
|
|
| 359 |
target_device = hidden_states.device
|
| 360 |
target_dtype = hidden_states.dtype
|
| 361 |
|
|
|
|
| 362 |
if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
|
| 363 |
self.to(device=target_device, dtype=target_dtype)
|
| 364 |
|
|
|
|
| 404 |
# =====================================================
|
| 405 |
|
| 406 |
def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
|
| 407 |
+
"""Transformer Attention → PHOENIX Retention"""
|
| 408 |
print("🔄 Starting Attention → Retention conversion...")
|
| 409 |
|
| 410 |
replaced_count = 0
|
| 411 |
total_layers = 0
|
| 412 |
|
| 413 |
layers = None
|
|
|
|
| 414 |
|
| 415 |
if structure_info and structure_info.get('layer_path'):
|
| 416 |
layer_path = structure_info['layer_path']
|
|
|
|
| 417 |
|
| 418 |
if layer_path == 'model.layers':
|
| 419 |
if hasattr(model, 'model') and hasattr(model.model, 'layers'):
|
|
|
|
| 432 |
result = path_fn(model)
|
| 433 |
if result is not None:
|
| 434 |
layers = result
|
|
|
|
|
|
|
| 435 |
break
|
| 436 |
|
| 437 |
if layers is None:
|
|
|
|
| 477 |
|
| 478 |
|
| 479 |
# =====================================================
|
| 480 |
+
# 🆕 MULTI-GPU Fine-tuning 파이프라인
|
| 481 |
# =====================================================
|
| 482 |
|
| 483 |
def finetune_retention_model(
|
|
|
|
| 487 |
batch_size: int = 4,
|
| 488 |
learning_rate: float = 1e-5,
|
| 489 |
output_dir: str = "/data/finetuning_temp",
|
| 490 |
+
use_gradient_checkpointing: bool = True,
|
| 491 |
):
|
| 492 |
"""
|
| 493 |
+
🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
|
| 494 |
"""
|
| 495 |
print("\n" + "="*80)
|
| 496 |
+
print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
|
| 497 |
print("="*80)
|
| 498 |
+
print(f" GPUs: {NUM_GPUS}")
|
| 499 |
print(f" Target Steps: {num_steps}")
|
| 500 |
+
print(f" Batch Size per GPU: {batch_size}")
|
| 501 |
+
print(f" Global Batch Size: {batch_size * NUM_GPUS}")
|
| 502 |
print(f" Learning Rate: {learning_rate}")
|
| 503 |
+
print(f" Gradient Checkpointing: {use_gradient_checkpointing}")
|
| 504 |
|
| 505 |
start_time = time.time()
|
| 506 |
|
| 507 |
+
# ✅ Gradient Checkpointing (메모리 절약)
|
| 508 |
+
if use_gradient_checkpointing:
|
| 509 |
+
if hasattr(model, 'gradient_checkpointing_enable'):
|
| 510 |
+
model.gradient_checkpointing_enable()
|
| 511 |
+
print(f" ✅ Gradient Checkpointing enabled")
|
| 512 |
+
|
| 513 |
+
# Dataset 준비
|
| 514 |
train_dataset = prepare_simple_dataset(
|
| 515 |
tokenizer=tokenizer,
|
| 516 |
num_steps=num_steps,
|
| 517 |
+
batch_size=batch_size * NUM_GPUS # Multi-GPU 고려
|
| 518 |
)
|
| 519 |
|
| 520 |
+
# ✅ Multi-GPU Training Arguments
|
| 521 |
training_args = TrainingArguments(
|
| 522 |
output_dir=output_dir,
|
| 523 |
+
|
| 524 |
+
# 🚀 Multi-GPU 설정
|
| 525 |
+
per_device_train_batch_size=batch_size, # GPU당 batch
|
| 526 |
+
gradient_accumulation_steps=max(1, 8 // NUM_GPUS), # GPU 수에 따라 조정
|
| 527 |
+
|
| 528 |
+
# Training 설정
|
| 529 |
num_train_epochs=1,
|
| 530 |
+
max_steps=num_steps,
|
| 531 |
learning_rate=learning_rate,
|
| 532 |
warmup_steps=100,
|
| 533 |
+
|
| 534 |
+
# Optimization
|
| 535 |
+
fp16=True, # Mixed precision
|
| 536 |
+
optim="adamw_torch_fused", # H100 최적화
|
| 537 |
+
|
| 538 |
+
# Logging
|
| 539 |
logging_steps=50,
|
| 540 |
+
logging_first_step=True,
|
| 541 |
save_steps=1000,
|
| 542 |
+
save_total_limit=2,
|
| 543 |
+
|
| 544 |
+
# Performance
|
| 545 |
+
dataloader_num_workers=4 * NUM_GPUS, # GPU당 4 workers
|
| 546 |
+
dataloader_pin_memory=True,
|
| 547 |
+
|
| 548 |
+
# Multi-GPU 관련
|
| 549 |
+
ddp_find_unused_parameters=False,
|
| 550 |
+
ddp_backend="nccl", # H100 최적화
|
| 551 |
+
|
| 552 |
+
# Misc
|
| 553 |
remove_unused_columns=False,
|
| 554 |
report_to="none",
|
| 555 |
+
|
| 556 |
+
# ✅ DeepSpeed (선택사항)
|
| 557 |
+
# deepspeed="ds_config.json", # DeepSpeed 사용시
|
| 558 |
)
|
| 559 |
|
| 560 |
# Data collator
|
|
|
|
| 563 |
mlm=False
|
| 564 |
)
|
| 565 |
|
| 566 |
+
# ✅ Trainer (자동 Multi-GPU)
|
| 567 |
trainer = Trainer(
|
| 568 |
model=model,
|
| 569 |
args=training_args,
|
|
|
|
| 573 |
)
|
| 574 |
|
| 575 |
# Train!
|
| 576 |
+
print(f"\n🚀 Starting Multi-GPU Fine-tuning...")
|
| 577 |
+
print(f" Using {NUM_GPUS} GPUs")
|
| 578 |
+
|
| 579 |
trainer.train()
|
| 580 |
|
| 581 |
elapsed = time.time() - start_time
|
| 582 |
|
| 583 |
print(f"\n✅ Fine-tuning Complete!")
|
| 584 |
print(f" Time: {elapsed/60:.1f} minutes")
|
| 585 |
+
print(f" Effective samples/sec: {(num_steps * batch_size * NUM_GPUS) / elapsed:.2f}")
|
| 586 |
print(f"="*80 + "\n")
|
| 587 |
|
| 588 |
return model
|
|
|
|
| 594 |
batch_size: int,
|
| 595 |
max_length: int = 2048,
|
| 596 |
):
|
| 597 |
+
"""Dataset 준비"""
|
| 598 |
print(f"\n📊 Preparing Dataset...")
|
| 599 |
|
| 600 |
num_samples = num_steps * batch_size
|
|
|
|
| 623 |
tokenized = dataset.map(
|
| 624 |
tokenize_function,
|
| 625 |
batched=True,
|
| 626 |
+
remove_columns=dataset.column_names,
|
| 627 |
+
num_proc=4 # Parallel processing
|
| 628 |
)
|
| 629 |
|
| 630 |
print(f" ✅ Tokenized: {len(tokenized)} samples")
|
|
|
|
| 636 |
model_size: str,
|
| 637 |
num_steps: int,
|
| 638 |
batch_size: int,
|
| 639 |
+
num_gpus: int = NUM_GPUS,
|
| 640 |
+
gpu_type: str = "H100",
|
| 641 |
) -> Dict:
|
| 642 |
+
"""비용 계산기 - Multi-GPU"""
|
| 643 |
gpu_costs = {
|
| 644 |
"H100": 3.0,
|
| 645 |
"A100": 2.0,
|
| 646 |
"A10G": 1.0,
|
|
|
|
| 647 |
}
|
| 648 |
|
| 649 |
model_step_times = {
|
|
|
|
| 654 |
"14B": 6.0,
|
| 655 |
}
|
| 656 |
|
| 657 |
+
# Multi-GPU로 인한 시간 단축 (linear scaling 가정)
|
| 658 |
step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
|
| 659 |
+
step_time_per_gpu = step_time / num_gpus # GPU 병렬화
|
| 660 |
+
|
| 661 |
+
total_seconds = num_steps * step_time_per_gpu
|
| 662 |
total_hours = total_seconds / 3600
|
| 663 |
+
|
| 664 |
+
# 비용은 GPU 수만큼 곱함
|
| 665 |
+
total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0) * num_gpus
|
| 666 |
|
| 667 |
return {
|
| 668 |
'hours': round(total_hours, 2),
|
| 669 |
'cost_usd': round(total_cost_usd, 2),
|
| 670 |
'cost_krw': round(total_cost_usd * 1300, 0),
|
| 671 |
+
'num_gpus': num_gpus,
|
| 672 |
+
'gpu_type': gpu_type,
|
| 673 |
}
|
| 674 |
|
| 675 |
|
| 676 |
# =====================================================
|
| 677 |
+
# Custom Modeling Code (동일)
|
| 678 |
# =====================================================
|
| 679 |
|
| 680 |
def generate_modeling_phoenix_code():
|
|
|
|
| 896 |
|
| 897 |
|
| 898 |
# =====================================================
|
| 899 |
+
# 저장/업로드/평가 (동일)
|
| 900 |
# =====================================================
|
| 901 |
|
| 902 |
def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
|
| 903 |
+
"""PHOENIX 모델 저장"""
|
| 904 |
output_path = Path(output_path)
|
| 905 |
output_path.mkdir(parents=True, exist_ok=True)
|
| 906 |
|
| 907 |
print(f"\n💾 Saving PHOENIX model...")
|
| 908 |
|
|
|
|
| 909 |
if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
|
| 910 |
if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
|
| 911 |
model.lm_head.weight = model.model.embed_tokens.weight
|
|
|
|
| 913 |
model.save_pretrained(output_path)
|
| 914 |
tokenizer.save_pretrained(output_path)
|
| 915 |
|
|
|
|
| 916 |
modeling_code = generate_modeling_phoenix_code()
|
| 917 |
with open(output_path / "modeling_phoenix.py", "w") as f:
|
| 918 |
f.write(modeling_code)
|
| 919 |
|
|
|
|
| 920 |
config_path = output_path / "config.json"
|
| 921 |
if config_path.exists():
|
| 922 |
with open(config_path, "r") as f:
|
|
|
|
| 932 |
with open(config_path, "w") as f:
|
| 933 |
json.dump(config_dict, f, indent=2)
|
| 934 |
|
|
|
|
| 935 |
with open(output_path / 'phoenix_metadata.json', 'w') as f:
|
| 936 |
json.dump(metadata, f, indent=2)
|
| 937 |
|
|
|
|
| 938 |
readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
|
| 939 |
|
| 940 |
+
**Multi-GPU Trained** with {metadata.get('num_gpus', 1)} GPUs
|
| 941 |
+
|
| 942 |
## Features
|
| 943 |
- ✅ Brumby-style Retraining
|
| 944 |
- ✅ O(n) Complexity
|
|
|
|
| 956 |
)
|
| 957 |
```
|
| 958 |
|
| 959 |
+
**VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
|
| 960 |
"""
|
| 961 |
|
| 962 |
with open(output_path / "README.md", "w") as f:
|
| 963 |
f.write(readme)
|
| 964 |
|
| 965 |
+
print(f" ✅ Model saved")
|
| 966 |
|
| 967 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 968 |
def upload_to_huggingface_hub(
|
| 969 |
model_path: str,
|
| 970 |
original_model_url: str,
|
|
|
|
| 972 |
private: bool = True,
|
| 973 |
token: str = None,
|
| 974 |
) -> Tuple[bool, str, str]:
|
| 975 |
+
"""Upload to Hub"""
|
| 976 |
|
| 977 |
if token is None:
|
| 978 |
token = HF_TOKEN
|
|
|
|
| 1014 |
return False, "", f"❌ Upload failed: {e}"
|
| 1015 |
|
| 1016 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1017 |
def evaluate_model_quality(model, tokenizer):
|
| 1018 |
+
"""Quality 평가"""
|
| 1019 |
test_prompts = [
|
| 1020 |
"The capital of France is",
|
| 1021 |
"In machine learning,",
|
|
|
|
| 1053 |
|
| 1054 |
|
| 1055 |
# =====================================================
|
| 1056 |
+
# 🆕 Multi-GPU Burning 함수
|
| 1057 |
# =====================================================
|
| 1058 |
|
| 1059 |
def burn_model_with_finetuning(
|
|
|
|
| 1064 |
num_steps: int = 3000,
|
| 1065 |
batch_size: int = 4,
|
| 1066 |
learning_rate: float = 1e-5,
|
| 1067 |
+
use_gradient_checkpointing: bool = True,
|
| 1068 |
):
|
| 1069 |
+
"""🆕 v2.0: Multi-GPU Optimized Burning"""
|
| 1070 |
print("="*80)
|
| 1071 |
+
print(f"🔥 PHOENIX Model Burning v2.0 - Multi-GPU ({NUM_GPUS} GPUs)")
|
| 1072 |
print("="*80)
|
| 1073 |
|
| 1074 |
output_path = Path(output_dir)
|
|
|
|
| 1079 |
print(f"\n🔍 STEP 1: Structure Analysis...")
|
| 1080 |
structure_info = analyze_model_structure(model_url)
|
| 1081 |
|
| 1082 |
+
# STEP 2: Load Model with device_map="auto"
|
| 1083 |
+
print(f"\n📥 STEP 2: Loading model (Multi-GPU)...")
|
| 1084 |
start_time = time.time()
|
| 1085 |
|
| 1086 |
config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
|
| 1087 |
+
|
| 1088 |
+
# ✅ Multi-GPU: device_map="auto"로 자동 분산
|
| 1089 |
model = AutoModelForCausalLM.from_pretrained(
|
| 1090 |
model_url,
|
| 1091 |
trust_remote_code=True,
|
| 1092 |
torch_dtype=torch.float16,
|
| 1093 |
+
device_map="auto" # 자동으로 8개 GPU에 분산!
|
| 1094 |
+
)
|
| 1095 |
|
| 1096 |
tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
|
| 1097 |
if tokenizer.pad_token is None:
|
| 1098 |
tokenizer.pad_token = tokenizer.eos_token
|
| 1099 |
|
| 1100 |
load_time = time.time() - start_time
|
| 1101 |
+
print(f"✅ Loaded across {NUM_GPUS} GPUs in {load_time:.1f}s")
|
| 1102 |
|
| 1103 |
# STEP 3: Convert
|
| 1104 |
print(f"\n🔄 STEP 3: Converting Attention → Retention...")
|
|
|
|
| 1115 |
|
| 1116 |
print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
|
| 1117 |
|
| 1118 |
+
# STEP 4: Fine-tuning (Multi-GPU)
|
| 1119 |
if enable_finetuning:
|
| 1120 |
+
print(f"\n🚀 STEP 4: Multi-GPU Fine-tuning...")
|
| 1121 |
ft_start = time.time()
|
| 1122 |
|
| 1123 |
model = finetune_retention_model(
|
|
|
|
| 1126 |
num_steps=num_steps,
|
| 1127 |
batch_size=batch_size,
|
| 1128 |
learning_rate=learning_rate,
|
| 1129 |
+
use_gradient_checkpointing=use_gradient_checkpointing,
|
| 1130 |
)
|
| 1131 |
|
| 1132 |
ft_time = time.time() - ft_start
|
| 1133 |
print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
|
| 1134 |
else:
|
| 1135 |
ft_time = 0
|
| 1136 |
+
print(f"\n⏭️ STEP 4: Fine-tuning skipped")
|
| 1137 |
|
| 1138 |
# STEP 5: Evaluate
|
| 1139 |
print(f"\n📊 STEP 5: Evaluating...")
|
|
|
|
| 1151 |
'quality_score': quality_score,
|
| 1152 |
'finetuned': enable_finetuning,
|
| 1153 |
'finetuning_steps': num_steps if enable_finetuning else 0,
|
| 1154 |
+
'num_gpus': NUM_GPUS,
|
| 1155 |
+
'gradient_checkpointing': use_gradient_checkpointing,
|
| 1156 |
'timestamp': datetime.now().isoformat(),
|
| 1157 |
}
|
| 1158 |
|
|
|
|
| 1167 |
'quality_score': quality_score,
|
| 1168 |
'total_time': total_time,
|
| 1169 |
'finetuned': enable_finetuning,
|
| 1170 |
+
'num_gpus': NUM_GPUS,
|
| 1171 |
'structure_info': structure_info,
|
| 1172 |
}
|
| 1173 |
|
| 1174 |
print(f"\n{'='*80}")
|
| 1175 |
+
print(f"✅ Multi-GPU Burning Complete!")
|
| 1176 |
+
print(f" GPUs Used: {NUM_GPUS}")
|
| 1177 |
print(f" Model: {output_path}")
|
| 1178 |
print(f" Quality: {quality_score:.2f}/1.00")
|
|
|
|
| 1179 |
print(f"{'='*80}\n")
|
| 1180 |
|
| 1181 |
return result
|
|
|
|
| 1190 |
|
| 1191 |
|
| 1192 |
# =====================================================
|
| 1193 |
+
# Database (동일)
|
| 1194 |
# =====================================================
|
| 1195 |
|
| 1196 |
class ExperimentDatabase:
|
|
|
|
| 1210 |
conversion_rate REAL,
|
| 1211 |
quality_score REAL,
|
| 1212 |
finetuned BOOLEAN,
|
| 1213 |
+
num_gpus INTEGER,
|
| 1214 |
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
|
| 1215 |
)
|
| 1216 |
""")
|
|
|
|
| 1221 |
cursor = conn.cursor()
|
| 1222 |
cursor.execute("""
|
| 1223 |
INSERT INTO burning_history
|
| 1224 |
+
(model_url, output_path, hub_url, conversion_rate, quality_score, finetuned, num_gpus)
|
| 1225 |
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
| 1226 |
""", (
|
| 1227 |
info.get('model_url'),
|
| 1228 |
info.get('output_path'),
|
|
|
|
| 1230 |
info.get('conversion_rate'),
|
| 1231 |
info.get('quality_score'),
|
| 1232 |
info.get('finetuned'),
|
| 1233 |
+
info.get('num_gpus', 1),
|
| 1234 |
))
|
| 1235 |
conn.commit()
|
| 1236 |
return cursor.lastrowid
|
|
|
|
| 1258 |
ft_steps,
|
| 1259 |
ft_batch,
|
| 1260 |
ft_lr,
|
| 1261 |
+
use_grad_ckpt,
|
| 1262 |
upload_hub,
|
| 1263 |
hub_repo,
|
| 1264 |
hub_private,
|
| 1265 |
):
|
| 1266 |
+
"""Gradio UI"""
|
| 1267 |
|
| 1268 |
try:
|
| 1269 |
if not model_url.strip():
|
|
|
|
| 1274 |
|
| 1275 |
output_dir = f"{MODELS_PATH}/{output_name}"
|
| 1276 |
|
| 1277 |
+
# 비용 추정
|
| 1278 |
if enable_finetuning:
|
| 1279 |
model_size = "0.6B" if "0.6B" in model_url else "1.5B"
|
| 1280 |
+
cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch, NUM_GPUS)
|
| 1281 |
+
print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h with {NUM_GPUS} GPUs)")
|
| 1282 |
|
| 1283 |
# Burn
|
| 1284 |
result = burn_model_with_finetuning(
|
|
|
|
| 1289 |
num_steps=ft_steps,
|
| 1290 |
batch_size=ft_batch,
|
| 1291 |
learning_rate=ft_lr,
|
| 1292 |
+
use_gradient_checkpointing=use_grad_ckpt,
|
| 1293 |
)
|
| 1294 |
|
| 1295 |
if result['status'] != 'success':
|
|
|
|
| 1313 |
'conversion_rate': result['conversion_rate'],
|
| 1314 |
'quality_score': result['quality_score'],
|
| 1315 |
'finetuned': enable_finetuning,
|
| 1316 |
+
'num_gpus': NUM_GPUS,
|
| 1317 |
})
|
| 1318 |
|
| 1319 |
# Output
|
| 1320 |
output_md = f"""
|
| 1321 |
+
# 🔥 PHOENIX v2.0 Multi-GPU Complete!
|
| 1322 |
+
|
| 1323 |
+
## Hardware
|
| 1324 |
+
- **GPUs Used**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
|
| 1325 |
|
| 1326 |
## Model Info
|
| 1327 |
- **Original**: {model_url}
|
|
|
|
| 1329 |
- **Conversion**: {result['conversion_rate']*100:.1f}%
|
| 1330 |
- **Quality**: {result['quality_score']:.2f}/1.00
|
| 1331 |
- **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
|
|
|
|
|
|
|
| 1332 |
"""
|
| 1333 |
|
| 1334 |
if hub_url:
|
| 1335 |
output_md += f"""
|
| 1336 |
+
|
| 1337 |
+
## Hub Status
|
| 1338 |
✅ **Uploaded**: [{hub_url}]({hub_url})
|
| 1339 |
|
| 1340 |
```python
|
| 1341 |
model = AutoModelForCausalLM.from_pretrained(
|
| 1342 |
"{hub_url.replace('https://huggingface.co/', '')}",
|
| 1343 |
+
trust_remote_code=True,
|
| 1344 |
+
device_map="auto" # Multi-GPU
|
| 1345 |
)
|
| 1346 |
```
|
| 1347 |
"""
|
|
|
|
|
|
|
| 1348 |
|
| 1349 |
# Plot
|
| 1350 |
fig = go.Figure()
|
|
|
|
| 1353 |
y=[result['conversion_rate'], result['quality_score']],
|
| 1354 |
marker_color=['#3b82f6', '#10b981']
|
| 1355 |
))
|
| 1356 |
+
fig.update_layout(title=f"Metrics ({NUM_GPUS} GPUs)", yaxis_range=[0, 1])
|
| 1357 |
|
| 1358 |
return output_md, fig
|
| 1359 |
|
|
|
|
| 1363 |
|
| 1364 |
|
| 1365 |
def view_history():
|
| 1366 |
+
"""History"""
|
| 1367 |
try:
|
| 1368 |
history = db.get_history(20)
|
| 1369 |
if not history:
|
|
|
|
| 1376 |
x='timestamp',
|
| 1377 |
y='quality_score',
|
| 1378 |
color='finetuned',
|
| 1379 |
+
size='num_gpus',
|
| 1380 |
+
title='Burning History (Multi-GPU)'
|
| 1381 |
)
|
| 1382 |
|
| 1383 |
return f"## History\n\n{df.to_markdown(index=False)}", fig
|
|
|
|
| 1389 |
# Gradio App
|
| 1390 |
# =====================================================
|
| 1391 |
|
| 1392 |
+
with gr.Blocks(title="🔥 PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as demo:
|
| 1393 |
|
| 1394 |
+
gr.Markdown(f"""
|
| 1395 |
+
# 🔥 PHOENIX v2.0 - Multi-GPU Optimized
|
| 1396 |
|
| 1397 |
+
**H100 x {NUM_GPUS} GPUs Ready**
|
| 1398 |
|
| 1399 |
+
🆕 **v2.0 Multi-GPU**: Accelerate 통합, DDP 지원
|
| 1400 |
+
🆕 **v2.0**: Fine-tuning 파이프라인 (Brumby-style)
|
| 1401 |
+
✅ v1.4.3: All fixes included
|
| 1402 |
✅ GQA Support | O(n) Complexity
|
| 1403 |
|
| 1404 |
---
|
|
|
|
| 1417 |
burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
|
| 1418 |
|
| 1419 |
gr.Markdown("---")
|
| 1420 |
+
gr.Markdown(f"### 🆕 Fine-tuning ({NUM_GPUS} GPUs)")
|
| 1421 |
|
| 1422 |
burn_ft_enable = gr.Checkbox(
|
| 1423 |
value=False,
|
| 1424 |
label="🚀 Enable Fine-tuning (Brumby-style)",
|
| 1425 |
+
info=f"Multi-GPU acceleration with {NUM_GPUS} GPUs!"
|
| 1426 |
)
|
| 1427 |
|
| 1428 |
burn_ft_steps = gr.Slider(
|
| 1429 |
1000, 10000, 3000,
|
| 1430 |
step=100,
|
| 1431 |
+
label="Steps",
|
| 1432 |
visible=False
|
| 1433 |
)
|
| 1434 |
|
| 1435 |
+
burn_ft_batch = gr.Slider(
|
| 1436 |
+
1, 16, 4,
|
| 1437 |
+
step=1,
|
| 1438 |
+
label=f"Batch Size per GPU ({NUM_GPUS} GPUs)",
|
| 1439 |
+
visible=False
|
| 1440 |
+
)
|
| 1441 |
burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
|
| 1442 |
|
| 1443 |
+
burn_grad_ckpt = gr.Checkbox(
|
| 1444 |
+
value=True,
|
| 1445 |
+
label="✅ Gradient Checkpointing (saves memory)",
|
| 1446 |
+
visible=False
|
| 1447 |
+
)
|
| 1448 |
+
|
| 1449 |
def toggle_ft(enabled):
|
| 1450 |
return [
|
| 1451 |
gr.update(visible=enabled),
|
| 1452 |
gr.update(visible=enabled),
|
| 1453 |
gr.update(visible=enabled),
|
| 1454 |
+
gr.update(visible=enabled),
|
| 1455 |
]
|
| 1456 |
|
| 1457 |
burn_ft_enable.change(
|
| 1458 |
toggle_ft,
|
| 1459 |
[burn_ft_enable],
|
| 1460 |
+
[burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt]
|
| 1461 |
)
|
| 1462 |
|
| 1463 |
gr.Markdown("---")
|
|
|
|
| 1477 |
burn_phoenix_model_ui,
|
| 1478 |
[
|
| 1479 |
burn_url, burn_hier, burn_name,
|
| 1480 |
+
burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt,
|
| 1481 |
burn_upload, burn_repo, burn_private
|
| 1482 |
],
|
| 1483 |
[burn_output, burn_plot]
|
|
|
|
| 1496 |
gr.Markdown(f"""
|
| 1497 |
---
|
| 1498 |
|
| 1499 |
+
## 🔥 PHOENIX v2.0 Multi-GPU
|
| 1500 |
+
|
| 1501 |
+
**Hardware**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
|
| 1502 |
|
| 1503 |
+
**Features**:
|
| 1504 |
+
- 🆕 Multi-GPU Training (DDP)
|
| 1505 |
+
- 🆕 Gradient Checkpointing
|
| 1506 |
+
- 🆕 H100 Optimized (fused optimizer)
|
| 1507 |
+
- 🆕 Brumby-style Fine-tuning
|
| 1508 |
+
- ✅ All v1.4.3 Fixes
|
| 1509 |
|
| 1510 |
**Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
|
| 1511 |
+
**VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
|
| 1512 |
""")
|
| 1513 |
|
| 1514 |
|