phoenix / app.py
seawolf2357's picture
Update app.py
23f9fc2 verified
"""
πŸ”₯ PHOENIX Retention Research Platform v2.0 - MULTI-GPU OPTIMIZED
H100 x 8 GPU μ΅œμ ν™” 버전
βœ… v2.0 NEW: Multi-GPU (8x H100) μ΅œμ ν™”
βœ… v2.0 NEW: Accelerate 톡합
βœ… v2.0 NEW: DeepSpeed ZeRO-3 지원
βœ… v2.0 NEW: Gradient Checkpointing
βœ… Fine-tuning νŒŒμ΄ν”„λΌμΈ (Brumby-style)
βœ… λͺ¨λ“  v1.4.3 μˆ˜μ •μ‚¬ν•­ 포함
VIDraft AI Research Lab - Multi-GPU Version v2.0
"""
import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
import sqlite3
import json
import time
import numpy as np
from datetime import datetime
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from typing import Dict, List, Any, Tuple, Optional
from transformers import (
AutoModel, AutoTokenizer, AutoConfig, AutoModelForCausalLM,
get_cosine_schedule_with_warmup, TrainingArguments, Trainer,
DataCollatorForLanguageModeling
)
from datasets import load_dataset, concatenate_datasets
from torch.utils.data import Dataset, DataLoader
from accelerate import Accelerator
from tqdm import tqdm
import copy
import shutil
import os
from huggingface_hub import HfApi, create_repo
# =====================================================
# μ „μ—­ μ„€μ • - MULTI-GPU
# =====================================================
# GPU μ„€μ •
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
NUM_GPUS = torch.cuda.device_count()
# βœ… μš°λΆ„νˆ¬ ν˜Έν™˜: ν™ˆ 디렉토리 λ˜λŠ” ν™˜κ²½ λ³€μˆ˜ μ‚¬μš©
STORAGE_PATH = os.getenv("PHOENIX_STORAGE_PATH", str(Path.home() / "phoenix_data"))
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
# HuggingFace Token
HF_TOKEN = os.getenv("HF_TOKEN")
# 디렉토리 생성 (κΆŒν•œ 였λ₯˜ 처리)
try:
Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
print(f"βœ… Storage initialized: {STORAGE_PATH}")
except PermissionError:
print(f"⚠️ Permission denied for {STORAGE_PATH}")
print(f" Using current directory instead")
STORAGE_PATH = "./phoenix_data"
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
print(f"πŸ”₯ PHOENIX Platform v2.0 - Multi-GPU Optimized")
print(f"πŸ’Ύ Storage: {STORAGE_PATH}")
print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
print(f"πŸš€ GPUs Available: {NUM_GPUS}")
if NUM_GPUS > 0:
for i in range(NUM_GPUS):
print(f" GPU {i}: {torch.cuda.get_device_name(i)}")
if HF_TOKEN:
print(f"πŸ”‘ HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
# =====================================================
# λͺ¨λΈ ꡬ쑰 뢄석 ν•¨μˆ˜
# =====================================================
def analyze_model_structure(model_url: str) -> Dict[str, Any]:
"""πŸ” λͺ¨λΈ ꡬ쑰 사전 뢄석"""
print("\n" + "="*80)
print("πŸ” MODEL STRUCTURE ANALYSIS")
print("="*80)
try:
print(f"\nπŸ“₯ Loading model config: {model_url}")
config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
print(f"βœ… Config loaded")
# βœ… Multi-GPU: CPU둜만 λ‘œλ“œ (λΆ„μ„μš©)
print(f"\nπŸ“¦ Loading model structure (CPU only)...")
model = AutoModelForCausalLM.from_pretrained(
model_url,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="cpu" # Analysis만 CPUμ—μ„œ
)
analysis = {
'model_url': model_url,
'model_type': config.model_type if hasattr(config, 'model_type') else 'unknown',
'architectures': config.architectures[0] if hasattr(config, 'architectures') else 'unknown',
'hidden_size': config.hidden_size if hasattr(config, 'hidden_size') else 0,
'num_attention_heads': config.num_attention_heads if hasattr(config, 'num_attention_heads') else 0,
'num_hidden_layers': config.num_hidden_layers if hasattr(config, 'num_hidden_layers') else 0,
'num_key_value_heads': config.num_key_value_heads if hasattr(config, 'num_key_value_heads') else None,
'total_layers': 0,
'has_self_attn': False,
'layer_path': None,
}
# Layer 뢄석
layers = None
layer_path = None
possible_paths = [
('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
]
for path_name, path_fn in possible_paths:
result = path_fn(model)
if result is not None:
layers = result
layer_path = path_name
break
if layers:
analysis['total_layers'] = len(layers)
analysis['layer_path'] = layer_path
if len(layers) > 0:
first_layer = layers[0]
if hasattr(first_layer, 'self_attn'):
analysis['has_self_attn'] = True
attn = first_layer.self_attn
if hasattr(attn, 'q_proj'):
q_shape = attn.q_proj.weight.shape
k_shape = attn.k_proj.weight.shape
if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
head_dim = q_shape[0] // config.num_attention_heads
analysis['head_dim'] = head_dim
analysis['gqa_detected'] = (k_shape[0] != q_shape[0])
analysis['q_dim'] = q_shape[0]
analysis['k_dim'] = k_shape[0]
print(f"\n{'='*80}\n")
del model
torch.cuda.empty_cache()
return analysis
except Exception as e:
import traceback
print(f"\n❌ Structure analysis failed: {e}")
return {
'model_url': model_url,
'error': str(e),
'total_layers': 0,
}
# =====================================================
# PHOENIX Retention (동일)
# =====================================================
class MultiScaleRetention(nn.Module):
"""μ§„μ§œ Retention Attention with GQA Support"""
def __init__(self, config, layer_idx=0):
super().__init__()
self.config = config
self.layer_idx = layer_idx
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
if hasattr(config, 'head_dim'):
self.head_dim = config.head_dim
else:
self.head_dim = self.hidden_size // self.num_heads
if hasattr(config, 'num_key_value_heads'):
self.num_key_value_heads = config.num_key_value_heads
else:
self.num_key_value_heads = self.num_heads
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
self.kv_head_dim = self.head_dim
self.q_dim = self.num_heads * self.head_dim
self.kv_dim = self.num_key_value_heads * self.kv_head_dim
self.q_proj = nn.Linear(self.hidden_size, self.q_dim, bias=False)
self.k_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
self.v_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
self.o_proj = nn.Linear(self.q_dim, self.hidden_size, bias=False)
decay_values = torch.linspace(0.95, 0.99, self.num_heads)
self.decay = nn.Parameter(decay_values, requires_grad=True)
self.group_norm = nn.GroupNorm(
num_groups=self.num_heads,
num_channels=self.q_dim
)
def _repeat_kv(self, hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
"""Repeat K/V heads (GQA)"""
batch, num_key_value_heads, slen, head_dim = hidden_states.shape
if n_rep == 1:
return hidden_states
hidden_states = hidden_states[:, :, None, :, :].expand(
batch, num_key_value_heads, n_rep, slen, head_dim
)
return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
cache_position: Optional[torch.Tensor] = None,
past_key_values: Optional[Tuple[torch.Tensor]] = None,
**kwargs
):
"""O(n) Retention"""
batch_size, seq_len, _ = hidden_states.shape
target_device = hidden_states.device
target_dtype = hidden_states.dtype
if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
self.to(device=target_device, dtype=target_dtype)
query_states = self.q_proj(hidden_states)
key_states = self.k_proj(hidden_states)
value_states = self.v_proj(hidden_states)
query_states = query_states.view(
batch_size, seq_len, self.num_heads, self.head_dim
).transpose(1, 2)
key_states = key_states.view(
batch_size, seq_len, self.num_key_value_heads, self.kv_head_dim
).transpose(1, 2)
value_states = value_states.view(
batch_size, seq_len, self.num_key_value_heads, self.kv_head_dim
).transpose(1, 2)
key_states = self._repeat_kv(key_states, self.num_key_value_groups)
value_states = self._repeat_kv(value_states, self.num_key_value_groups)
retention_states = self._compute_retention(
query_states, key_states, value_states
)
retention_states = retention_states.transpose(1, 2).contiguous()
retention_states = retention_states.reshape(
batch_size, seq_len, self.q_dim
)
if self.group_norm.weight.device != retention_states.device or self.group_norm.weight.dtype != retention_states.dtype:
self.group_norm = self.group_norm.to(device=retention_states.device, dtype=retention_states.dtype)
retention_states = self.group_norm(
retention_states.transpose(1, 2)
).transpose(1, 2)
retention_states = torch.clamp(retention_states, min=-10.0, max=10.0)
attn_output = self.o_proj(retention_states)
return (attn_output, None)
def _compute_retention(
self,
queries: torch.Tensor,
keys: torch.Tensor,
values: torch.Tensor,
):
"""O(n) Retention computation"""
batch_size, num_heads, seq_len, head_dim = queries.shape
state = torch.zeros(
batch_size, num_heads, head_dim, head_dim,
dtype=queries.dtype,
device=queries.device
) + 1e-6
outputs = []
decay = torch.sigmoid(self.decay).view(1, -1, 1, 1).to(
device=queries.device,
dtype=queries.dtype
)
for t in range(seq_len):
q_t = queries[:, :, t, :]
k_t = keys[:, :, t, :]
v_t = values[:, :, t, :]
state = decay * state
kv_update = torch.einsum('bhd,bhe->bhde', k_t, v_t)
kv_update = torch.clamp(kv_update, min=-5.0, max=5.0)
state = state + kv_update
state = torch.clamp(state, min=-10.0, max=10.0)
output_t = torch.einsum('bhd,bhde->bhe', q_t, state)
outputs.append(output_t)
output = torch.stack(outputs, dim=2)
return output
class HierarchicalRetention(nn.Module):
"""PHOENIX Hierarchical Retention"""
def __init__(self, config, layer_idx=0):
super().__init__()
self.base_retention = MultiScaleRetention(config, layer_idx)
hidden_size = config.hidden_size
self.d_state = hidden_size // 2
self.short_proj = nn.Linear(hidden_size, self.d_state)
self.medium_proj = nn.Linear(self.d_state, self.d_state)
self.long_proj = nn.Linear(self.d_state, self.d_state * 2)
self.fusion = nn.Linear(self.d_state * 4, hidden_size)
self.short_decay = 0.5
self.medium_decay = 0.8
self.long_decay = 0.95
self.norm = nn.LayerNorm(hidden_size)
def forward(
self,
hidden_states: torch.Tensor,
attention_mask: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
past_key_value: Optional[Tuple[torch.Tensor]] = None,
output_attentions: bool = False,
use_cache: bool = False,
cache_position: Optional[torch.Tensor] = None,
past_key_values: Optional[Tuple[torch.Tensor]] = None,
**kwargs
):
"""Hierarchical forward pass"""
batch_size, seq_len, hidden_size = hidden_states.shape
target_device = hidden_states.device
target_dtype = hidden_states.dtype
if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
self.to(device=target_device, dtype=target_dtype)
base_result = self.base_retention(
hidden_states, attention_mask, position_ids,
past_key_value, output_attentions, use_cache
)
retention_output = base_result[0]
short_state = torch.zeros(batch_size, self.d_state, dtype=target_dtype, device=target_device)
medium_state = torch.zeros(batch_size, self.d_state, dtype=target_dtype, device=target_device)
long_state = torch.zeros(batch_size, self.d_state * 2, dtype=target_dtype, device=target_device)
hierarchical_outputs = []
for t in range(seq_len):
x_t = retention_output[:, t, :]
short_input = self.short_proj(x_t)
short_state = self.short_decay * short_state + short_input
if t % 8 == 0:
medium_state = self.medium_decay * medium_state + \
self.medium_proj(short_state)
if t % 64 == 0:
long_state = self.long_decay * long_state + \
self.long_proj(medium_state)
combined = torch.cat([short_state, medium_state, long_state], dim=-1)
output_t = self.fusion(combined)
hierarchical_outputs.append(output_t)
output = torch.stack(hierarchical_outputs, dim=1)
output = self.norm(output)
return (output, None)
# =====================================================
# λͺ¨λΈ λ³€ν™˜ ν•¨μˆ˜
# =====================================================
def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
"""Transformer Attention β†’ PHOENIX Retention"""
print("πŸ”„ Starting Attention β†’ Retention conversion...")
replaced_count = 0
total_layers = 0
layers = None
if structure_info and structure_info.get('layer_path'):
layer_path = structure_info['layer_path']
if layer_path == 'model.layers':
if hasattr(model, 'model') and hasattr(model.model, 'layers'):
layers = model.model.layers
elif layer_path == 'transformer.h':
if hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
layers = model.transformer.h
if layers is None:
possible_paths = [
('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
]
for path_name, path_fn in possible_paths:
result = path_fn(model)
if result is not None:
layers = result
break
if layers is None:
print("❌ Cannot find layers")
return model, 0, 0
total_layers = len(layers)
print(f" Found {total_layers} layers")
if structure_info and structure_info.get('head_dim'):
model.config.head_dim = structure_info['head_dim']
for layer_idx, layer in enumerate(layers):
try:
if hasattr(layer, 'self_attn'):
old_attn = layer.self_attn
if use_hierarchical:
new_retention = HierarchicalRetention(model.config, layer_idx)
else:
new_retention = MultiScaleRetention(model.config, layer_idx)
if hasattr(old_attn, 'q_proj'):
try:
target = new_retention.base_retention if use_hierarchical else new_retention
target.q_proj.weight.data = old_attn.q_proj.weight.data.clone()
target.k_proj.weight.data = old_attn.k_proj.weight.data.clone()
target.v_proj.weight.data = old_attn.v_proj.weight.data.clone()
target.o_proj.weight.data = old_attn.o_proj.weight.data.clone()
except:
pass
layer.self_attn = new_retention
replaced_count += 1
except Exception as e:
continue
print(f"\nβœ… Conversion complete: {replaced_count}/{total_layers} layers")
return model, replaced_count, total_layers
# =====================================================
# πŸ†• MULTI-GPU Fine-tuning νŒŒμ΄ν”„λΌμΈ
# =====================================================
def finetune_retention_model(
model,
tokenizer,
num_steps: int = 3000,
batch_size: int = 4,
learning_rate: float = 1e-5,
output_dir: str = None,
use_gradient_checkpointing: bool = True,
):
"""
πŸ†• v2.0: Brumby-style Retraining with Multi-GPU Support
"""
# output_dir κΈ°λ³Έκ°’ μ„€μ •
if output_dir is None:
output_dir = f"{STORAGE_PATH}/finetuning_temp"
print("\n" + "="*80)
print("πŸ”₯ PHOENIX RETRAINING - Multi-GPU (v2.0)")
print("="*80)
print(f" GPUs: {NUM_GPUS}")
print(f" Target Steps: {num_steps}")
print(f" Batch Size per GPU: {batch_size}")
print(f" Global Batch Size: {batch_size * NUM_GPUS}")
print(f" Learning Rate: {learning_rate}")
print(f" Gradient Checkpointing: {use_gradient_checkpointing}")
start_time = time.time()
# βœ… Gradient Checkpointing (λ©”λͺ¨λ¦¬ μ ˆμ•½)
if use_gradient_checkpointing:
if hasattr(model, 'gradient_checkpointing_enable'):
model.gradient_checkpointing_enable()
print(f" βœ… Gradient Checkpointing enabled")
# Dataset μ€€λΉ„
train_dataset = prepare_simple_dataset(
tokenizer=tokenizer,
num_steps=num_steps,
batch_size=batch_size * NUM_GPUS # Multi-GPU κ³ λ €
)
# βœ… Multi-GPU Training Arguments
training_args = TrainingArguments(
output_dir=output_dir,
# πŸš€ Multi-GPU μ„€μ •
per_device_train_batch_size=batch_size, # GPUλ‹Ή batch
gradient_accumulation_steps=max(1, 8 // NUM_GPUS), # GPU μˆ˜μ— 따라 μ‘°μ •
# Training μ„€μ •
num_train_epochs=1,
max_steps=num_steps,
learning_rate=learning_rate,
warmup_steps=100,
# Optimization
fp16=True, # Mixed precision
optim="adamw_torch_fused", # H100 μ΅œμ ν™”
# Logging
logging_steps=50,
logging_first_step=True,
save_steps=1000,
save_total_limit=2,
# Performance
dataloader_num_workers=4 * NUM_GPUS, # GPUλ‹Ή 4 workers
dataloader_pin_memory=True,
# Multi-GPU κ΄€λ ¨
ddp_find_unused_parameters=False,
ddp_backend="nccl", # H100 μ΅œμ ν™”
# Misc
remove_unused_columns=False,
report_to="none",
# βœ… DeepSpeed (선택사항)
# deepspeed="ds_config.json", # DeepSpeed μ‚¬μš©μ‹œ
)
# Data collator
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False
)
# βœ… Trainer (μžλ™ Multi-GPU)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
data_collator=data_collator,
)
# Train!
print(f"\nπŸš€ Starting Multi-GPU Fine-tuning...")
print(f" Using {NUM_GPUS} GPUs")
trainer.train()
elapsed = time.time() - start_time
print(f"\nβœ… Fine-tuning Complete!")
print(f" Time: {elapsed/60:.1f} minutes")
print(f" Effective samples/sec: {(num_steps * batch_size * NUM_GPUS) / elapsed:.2f}")
print(f"="*80 + "\n")
return model
def prepare_simple_dataset(
tokenizer,
num_steps: int,
batch_size: int,
max_length: int = 2048,
):
"""Dataset μ€€λΉ„"""
print(f"\nπŸ“Š Preparing Dataset...")
num_samples = num_steps * batch_size
print(f" Target samples: {num_samples}")
try:
dataset = load_dataset(
"wikitext",
"wikitext-2-raw-v1",
split=f"train[:{num_samples}]"
)
print(f" βœ… Loaded: {len(dataset)} samples")
except Exception as e:
print(f" ❌ Failed: {e}")
raise
def tokenize_function(examples):
return tokenizer(
examples['text'],
truncation=True,
max_length=max_length,
padding="max_length",
)
tokenized = dataset.map(
tokenize_function,
batched=True,
remove_columns=dataset.column_names,
num_proc=4 # Parallel processing
)
print(f" βœ… Tokenized: {len(tokenized)} samples")
return tokenized
def estimate_finetuning_cost(
model_size: str,
num_steps: int,
batch_size: int,
num_gpus: int = NUM_GPUS,
gpu_type: str = "H100",
) -> Dict:
"""λΉ„μš© 계산기 - Multi-GPU"""
gpu_costs = {
"H100": 3.0,
"A100": 2.0,
"A10G": 1.0,
}
model_step_times = {
"0.6B": 0.5,
"1.5B": 1.0,
"3B": 2.0,
"7B": 3.5,
"14B": 6.0,
}
# Multi-GPU둜 μΈν•œ μ‹œκ°„ 단좕 (linear scaling κ°€μ •)
step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
step_time_per_gpu = step_time / num_gpus # GPU 병렬화
total_seconds = num_steps * step_time_per_gpu
total_hours = total_seconds / 3600
# λΉ„μš©μ€ GPU 수만큼 곱함
total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0) * num_gpus
return {
'hours': round(total_hours, 2),
'cost_usd': round(total_cost_usd, 2),
'cost_krw': round(total_cost_usd * 1300, 0),
'num_gpus': num_gpus,
'gpu_type': gpu_type,
}
# =====================================================
# Custom Modeling Code (동일)
# =====================================================
def generate_modeling_phoenix_code():
"""PHOENIX Custom Modeling Code v2.0"""
return '''"""
PHOENIX Retention Model v2.0
βœ… v2.0: Brumby-style Retraining support
βœ… v1.4.3: forward() μ‹œκ·Έλ‹ˆμ²˜ Transformers ν˜Έν™˜
βœ… v1.4.3: dtype 뢈일치 μˆ˜μ •
"""
import torch
import torch.nn as nn
from typing import Optional, Tuple
from transformers.modeling_utils import PreTrainedModel
from transformers.configuration_utils import PretrainedConfig
from transformers import AutoConfig, AutoModelForCausalLM
import os
class PhoenixConfig(PretrainedConfig):
model_type = "phoenix"
def __init__(self, use_phoenix_retention=True, phoenix_version="2.0",
original_model=None, use_hierarchical=True, **kwargs):
super().__init__(**kwargs)
self.use_phoenix_retention = use_phoenix_retention
self.phoenix_version = phoenix_version
self.original_model = original_model
self.use_hierarchical = use_hierarchical
class MultiScaleRetention(nn.Module):
def __init__(self, config, layer_idx=0):
super().__init__()
self.hidden_size = config.hidden_size
self.num_heads = config.num_attention_heads
self.head_dim = getattr(config, 'head_dim', self.hidden_size // self.num_heads)
self.num_key_value_heads = getattr(config, 'num_key_value_heads', self.num_heads)
self.num_key_value_groups = self.num_heads // self.num_key_value_heads
self.q_dim = self.num_heads * self.head_dim
self.kv_dim = self.num_key_value_heads * self.head_dim
self.q_proj = nn.Linear(self.hidden_size, self.q_dim, bias=False)
self.k_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
self.v_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
self.o_proj = nn.Linear(self.q_dim, self.hidden_size, bias=False)
self.decay = nn.Parameter(torch.linspace(0.95, 0.99, self.num_heads))
self.group_norm = nn.GroupNorm(self.num_heads, self.q_dim)
def _repeat_kv(self, x, n):
b, h, s, d = x.shape
if n == 1: return x
return x[:, :, None, :, :].expand(b, h, n, s, d).reshape(b, h*n, s, d)
def forward(self, hidden_states, **kwargs):
b, s, _ = hidden_states.shape
device, dtype = hidden_states.device, hidden_states.dtype
if self.q_proj.weight.device != device or self.q_proj.weight.dtype != dtype:
self.to(device=device, dtype=dtype)
q = self.q_proj(hidden_states).view(b, s, self.num_heads, self.head_dim).transpose(1, 2)
k = self.k_proj(hidden_states).view(b, s, self.num_key_value_heads, self.head_dim).transpose(1, 2)
v = self.v_proj(hidden_states).view(b, s, self.num_key_value_heads, self.head_dim).transpose(1, 2)
k = self._repeat_kv(k, self.num_key_value_groups)
v = self._repeat_kv(v, self.num_key_value_groups)
out = self._retention(q, k, v)
out = out.transpose(1, 2).reshape(b, s, self.q_dim)
out = self.group_norm(out.transpose(1, 2)).transpose(1, 2)
return (self.o_proj(torch.clamp(out, -10, 10)), None)
def _retention(self, q, k, v):
b, h, s, d = q.shape
state = torch.zeros(b, h, d, d, dtype=q.dtype, device=q.device) + 1e-6
decay = torch.sigmoid(self.decay).view(1, -1, 1, 1).to(q)
outs = []
for t in range(s):
state = decay * state + torch.clamp(torch.einsum('bhd,bhe->bhde', k[:,:,t], v[:,:,t]), -5, 5)
state = torch.clamp(state, -10, 10)
outs.append(torch.einsum('bhd,bhde->bhe', q[:,:,t], state))
return torch.stack(outs, dim=2)
class HierarchicalRetention(nn.Module):
def __init__(self, config, layer_idx=0):
super().__init__()
self.base_retention = MultiScaleRetention(config, layer_idx)
h = config.hidden_size
self.d_state = h // 2
self.short_proj = nn.Linear(h, self.d_state)
self.medium_proj = nn.Linear(self.d_state, self.d_state)
self.long_proj = nn.Linear(self.d_state, self.d_state*2)
self.fusion = nn.Linear(self.d_state*4, h)
self.norm = nn.LayerNorm(h)
self.decays = [0.5, 0.8, 0.95]
def forward(self, hidden_states, **kwargs):
b, s, h = hidden_states.shape
device, dtype = hidden_states.device, hidden_states.dtype
if self.short_proj.weight.device != device or self.short_proj.weight.dtype != dtype:
self.to(device=device, dtype=dtype)
ret_out = self.base_retention(hidden_states)[0]
short = torch.zeros(b, self.d_state, dtype=dtype, device=device)
med = torch.zeros(b, self.d_state, dtype=dtype, device=device)
long = torch.zeros(b, self.d_state*2, dtype=dtype, device=device)
outs = []
for t in range(s):
short = self.decays[0]*short + self.short_proj(ret_out[:,t])
if t % 8 == 0: med = self.decays[1]*med + self.medium_proj(short)
if t % 64 == 0: long = self.decays[2]*long + self.long_proj(med)
outs.append(self.fusion(torch.cat([short, med, long], -1)))
return (self.norm(torch.stack(outs, 1)), None)
def replace_attention_with_retention_for_loading(model, use_hierarchical=True):
layers = getattr(model, 'model', model)
layers = getattr(layers, 'layers', getattr(layers, 'h', None))
if layers is None: return model, 0, 0
original_dtype = None
for param in model.parameters():
original_dtype = param.dtype
break
cnt = 0
for i, layer in enumerate(layers):
if hasattr(layer, 'self_attn'):
new_ret = HierarchicalRetention(model.config, i) if use_hierarchical else MultiScaleRetention(model.config, i)
if original_dtype: new_ret = new_ret.to(dtype=original_dtype)
layer.self_attn = new_ret
cnt += 1
return model, cnt, len(layers)
class PhoenixPreTrainedModel(PreTrainedModel):
config_class = PhoenixConfig
base_model_prefix = "phoenix"
class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
def __init__(self, config):
super().__init__(config)
self._model = None
self._ready = False
@classmethod
def from_pretrained(cls, path, *args, **kwargs):
print(f"πŸ”₯ PHOENIX v2.0 loading from {path}")
config = AutoConfig.from_pretrained(path, trust_remote_code=True)
orig = getattr(config, 'original_model', 'Qwen/Qwen3-0.6B')
hier = getattr(config, 'use_hierarchical', True)
try:
base_cfg = AutoConfig.from_pretrained(orig, trust_remote_code=True)
except:
base_cfg = config
model = AutoModelForCausalLM.from_config(base_cfg)
model, conv, tot = replace_attention_with_retention_for_loading(model, hier)
print(f" βœ… Converted {conv}/{tot} layers")
sd = None
if os.path.exists(path):
for fname in ["model.safetensors", "pytorch_model.bin"]:
fpath = os.path.join(path, fname)
if os.path.exists(fpath):
if fname.endswith('.safetensors'):
from safetensors.torch import load_file
sd = load_file(fpath)
else:
sd = torch.load(fpath, map_location='cpu')
break
else:
from huggingface_hub import hf_hub_download
for fname in ["model.safetensors", "pytorch_model.bin"]:
try:
fpath = hf_hub_download(path, fname)
if fname.endswith('.safetensors'):
from safetensors.torch import load_file
sd = load_file(fpath)
else:
sd = torch.load(fpath, map_location='cpu')
break
except: pass
if sd:
miss, unex = model.load_state_dict(sd, strict=False)
print(f" πŸ“¦ Weights: {len(miss)} missing, {len(unex)} unexpected")
if 'lm_head.weight' in miss and getattr(config, 'tie_word_embeddings', False):
if hasattr(model, 'lm_head') and hasattr(model.model, 'embed_tokens'):
model.lm_head.weight = model.model.embed_tokens.weight
print(f" πŸ”— Tied embeddings")
inst = cls(config)
inst._model = model
inst._ready = True
print(f"βœ… PHOENIX v2.0 ready!")
return inst
def forward(self, *a, **k):
if not self._ready: raise ValueError("Not initialized")
return self._model(*a, **k)
def generate(self, *a, **k):
if not self._ready: raise ValueError("Not initialized")
return self._model.generate(*a, **k)
AutoConfig.register("phoenix", PhoenixConfig)
'''
# =====================================================
# μ €μž₯/μ—…λ‘œλ“œ/평가 (동일)
# =====================================================
def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
"""PHOENIX λͺ¨λΈ μ €μž₯"""
output_path = Path(output_path)
output_path.mkdir(parents=True, exist_ok=True)
print(f"\nπŸ’Ύ Saving PHOENIX model...")
if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
model.lm_head.weight = model.model.embed_tokens.weight
model.save_pretrained(output_path)
tokenizer.save_pretrained(output_path)
modeling_code = generate_modeling_phoenix_code()
with open(output_path / "modeling_phoenix.py", "w") as f:
f.write(modeling_code)
config_path = output_path / "config.json"
if config_path.exists():
with open(config_path, "r") as f:
config_dict = json.load(f)
config_dict["use_phoenix_retention"] = True
config_dict["phoenix_version"] = "2.0"
config_dict["original_model"] = original_model_url
config_dict["auto_map"] = {
"AutoModelForCausalLM": "modeling_phoenix.PhoenixModelForCausalLM",
}
with open(config_path, "w") as f:
json.dump(config_dict, f, indent=2)
with open(output_path / 'phoenix_metadata.json', 'w') as f:
json.dump(metadata, f, indent=2)
readme = f"""# πŸ”₯ PHOENIX v2.0 - {original_model_url}
**Multi-GPU Trained** with {metadata.get('num_gpus', 1)} GPUs
## Features
- βœ… Brumby-style Retraining
- βœ… O(n) Complexity
- βœ… GQA Support
## Usage
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained(
"{output_path.name}",
trust_remote_code=True,
torch_dtype="auto",
device_map="auto"
)
```
**VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
"""
with open(output_path / "README.md", "w") as f:
f.write(readme)
print(f" βœ… Model saved")
def upload_to_huggingface_hub(
model_path: str,
original_model_url: str,
repo_name: str = None,
private: bool = True,
token: str = None,
) -> Tuple[bool, str, str]:
"""Upload to Hub"""
if token is None:
token = HF_TOKEN
if not token:
return False, "", "❌ No HF_TOKEN"
try:
api = HfApi(token=token)
user_info = api.whoami(token=token)
username = user_info['name']
if not repo_name:
base_name = original_model_url.split('/')[-1]
repo_name = f"phoenix-{base_name}"
repo_id = f"{username}/{repo_name}"
create_repo(
repo_id=repo_id,
token=token,
private=private,
repo_type="model",
exist_ok=True
)
api.upload_folder(
folder_path=str(model_path),
repo_id=repo_id,
repo_type="model",
token=token,
)
hub_url = f"https://huggingface.co/{repo_id}"
return True, hub_url, f"βœ… Uploaded to {hub_url}"
except Exception as e:
return False, "", f"❌ Upload failed: {e}"
def evaluate_model_quality(model, tokenizer):
"""Quality 평가"""
test_prompts = [
"The capital of France is",
"In machine learning,",
"2 + 2 =",
]
model.eval()
scores = []
with torch.no_grad():
for prompt in test_prompts:
try:
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
**inputs,
max_new_tokens=20,
do_sample=False,
pad_token_id=tokenizer.eos_token_id,
)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
score = 0.0
if len(generated) > len(prompt):
score += 0.3
if not any(c in generated[len(prompt):] for c in ['οΏ½', '[UNK]']):
score += 0.3
if len(generated.split()) > len(prompt.split()) + 2:
score += 0.4
scores.append(score)
except:
scores.append(0.0)
return sum(scores) / len(scores) if scores else 0.0
# =====================================================
# πŸ†• Multi-GPU Burning ν•¨μˆ˜
# =====================================================
def burn_model_with_finetuning(
model_url: str,
output_dir: str,
use_hierarchical: bool = True,
enable_finetuning: bool = False,
num_steps: int = 3000,
batch_size: int = 4,
learning_rate: float = 1e-5,
use_gradient_checkpointing: bool = True,
):
"""πŸ†• v2.0: Multi-GPU Optimized Burning"""
print("="*80)
print(f"πŸ”₯ PHOENIX Model Burning v2.0 - Multi-GPU ({NUM_GPUS} GPUs)")
print("="*80)
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
try:
# STEP 1: Structure Analysis
print(f"\nπŸ” STEP 1: Structure Analysis...")
structure_info = analyze_model_structure(model_url)
# STEP 2: Load Model with device_map="auto"
print(f"\nπŸ“₯ STEP 2: Loading model (Multi-GPU)...")
start_time = time.time()
config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
# βœ… Multi-GPU: device_map="auto"둜 μžλ™ λΆ„μ‚°
model = AutoModelForCausalLM.from_pretrained(
model_url,
trust_remote_code=True,
torch_dtype=torch.float16,
device_map="auto" # μžλ™μœΌλ‘œ 8개 GPU에 λΆ„μ‚°!
)
tokenizer = AutoTokenizer.from_pretrained(model_url, trust_remote_code=True)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
load_time = time.time() - start_time
print(f"βœ… Loaded across {NUM_GPUS} GPUs in {load_time:.1f}s")
# STEP 3: Convert
print(f"\nπŸ”„ STEP 3: Converting Attention β†’ Retention...")
convert_start = time.time()
model, converted, total = replace_attention_with_retention(
model,
use_hierarchical=use_hierarchical,
structure_info=structure_info
)
convert_time = time.time() - convert_start
conversion_rate = converted / total if total > 0 else 0
print(f"βœ… Converted {converted}/{total} layers in {convert_time:.1f}s")
# STEP 4: Fine-tuning (Multi-GPU)
if enable_finetuning:
print(f"\nπŸš€ STEP 4: Multi-GPU Fine-tuning...")
ft_start = time.time()
model = finetune_retention_model(
model=model,
tokenizer=tokenizer,
num_steps=num_steps,
batch_size=batch_size,
learning_rate=learning_rate,
use_gradient_checkpointing=use_gradient_checkpointing,
)
ft_time = time.time() - ft_start
print(f"βœ… Fine-tuning completed in {ft_time/60:.1f} minutes")
else:
ft_time = 0
print(f"\n⏭️ STEP 4: Fine-tuning skipped")
# STEP 5: Evaluate
print(f"\nπŸ“Š STEP 5: Evaluating...")
quality_score = evaluate_model_quality(model, tokenizer)
print(f"βœ… Quality: {quality_score:.2f}/1.00")
# STEP 6: Save
print(f"\nπŸ’Ύ STEP 6: Saving...")
metadata = {
'phoenix_version': '2.0',
'original_model': model_url,
'use_hierarchical': use_hierarchical,
'conversion_rate': conversion_rate,
'quality_score': quality_score,
'finetuned': enable_finetuning,
'finetuning_steps': num_steps if enable_finetuning else 0,
'num_gpus': NUM_GPUS,
'gradient_checkpointing': use_gradient_checkpointing,
'timestamp': datetime.now().isoformat(),
}
save_phoenix_model_with_code(model, tokenizer, output_path, model_url, metadata)
total_time = time.time() - start_time
result = {
'status': 'success',
'model_path': str(output_path),
'conversion_rate': conversion_rate,
'quality_score': quality_score,
'total_time': total_time,
'finetuned': enable_finetuning,
'num_gpus': NUM_GPUS,
'structure_info': structure_info,
}
print(f"\n{'='*80}")
print(f"βœ… Multi-GPU Burning Complete!")
print(f" GPUs Used: {NUM_GPUS}")
print(f" Model: {output_path}")
print(f" Quality: {quality_score:.2f}/1.00")
print(f"{'='*80}\n")
return result
except Exception as e:
import traceback
return {
'status': 'failed',
'error': str(e),
'traceback': traceback.format_exc()
}
# =====================================================
# Database (동일)
# =====================================================
class ExperimentDatabase:
def __init__(self, db_path: str):
self.db_path = db_path
self.init_database()
def init_database(self):
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS burning_history (
id INTEGER PRIMARY KEY AUTOINCREMENT,
model_url TEXT,
output_path TEXT,
hub_url TEXT,
conversion_rate REAL,
quality_score REAL,
finetuned BOOLEAN,
num_gpus INTEGER,
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
def save_burning(self, info: Dict) -> int:
with sqlite3.connect(self.db_path) as conn:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO burning_history
(model_url, output_path, hub_url, conversion_rate, quality_score, finetuned, num_gpus)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (
info.get('model_url'),
info.get('output_path'),
info.get('hub_url'),
info.get('conversion_rate'),
info.get('quality_score'),
info.get('finetuned'),
info.get('num_gpus', 1),
))
conn.commit()
return cursor.lastrowid
def get_history(self, limit: int = 20) -> List[Dict]:
with sqlite3.connect(self.db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT * FROM burning_history ORDER BY timestamp DESC LIMIT ?", (limit,))
return [dict(row) for row in cursor.fetchall()]
db = ExperimentDatabase(DB_PATH)
# =====================================================
# Gradio UI
# =====================================================
def burn_phoenix_model_ui(
model_url,
use_hierarchical,
output_name,
enable_finetuning,
ft_steps,
ft_batch,
ft_lr,
use_grad_ckpt,
upload_hub,
hub_repo,
hub_private,
):
"""Gradio UI"""
try:
if not model_url.strip():
return "⚠️ Model URL required", None
if not output_name.strip():
output_name = f"phoenix_{model_url.split('/')[-1]}_{int(time.time())}"
output_dir = f"{MODELS_PATH}/{output_name}"
# λΉ„μš© μΆ”μ •
if enable_finetuning:
model_size = "0.6B" if "0.6B" in model_url else "1.5B"
cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch, NUM_GPUS)
print(f"\nπŸ’° Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h with {NUM_GPUS} GPUs)")
# Burn
result = burn_model_with_finetuning(
model_url=model_url,
output_dir=output_dir,
use_hierarchical=use_hierarchical,
enable_finetuning=enable_finetuning,
num_steps=ft_steps,
batch_size=ft_batch,
learning_rate=ft_lr,
use_gradient_checkpointing=use_grad_ckpt,
)
if result['status'] != 'success':
return f"❌ Failed\n```\n{result.get('error')}\n```", None
# Upload
hub_url = None
if upload_hub and HF_TOKEN:
success, hub_url, msg = upload_to_huggingface_hub(
model_path=result['model_path'],
original_model_url=model_url,
repo_name=hub_repo if hub_repo.strip() else None,
private=hub_private,
)
# DB
db.save_burning({
'model_url': model_url,
'output_path': result['model_path'],
'hub_url': hub_url,
'conversion_rate': result['conversion_rate'],
'quality_score': result['quality_score'],
'finetuned': enable_finetuning,
'num_gpus': NUM_GPUS,
})
# Output
output_md = f"""
# πŸ”₯ PHOENIX v2.0 Multi-GPU Complete!
## Hardware
- **GPUs Used**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
## Model Info
- **Original**: {model_url}
- **Output**: `{result['model_path']}`
- **Conversion**: {result['conversion_rate']*100:.1f}%
- **Quality**: {result['quality_score']:.2f}/1.00
- **Fine-tuned**: {'βœ… YES' if enable_finetuning else '❌ NO'}
"""
if hub_url:
output_md += f"""
## Hub Status
βœ… **Uploaded**: [{hub_url}]({hub_url})
```python
model = AutoModelForCausalLM.from_pretrained(
"{hub_url.replace('https://huggingface.co/', '')}",
trust_remote_code=True,
device_map="auto" # Multi-GPU
)
```
"""
# Plot
fig = go.Figure()
fig.add_trace(go.Bar(
x=['Conversion', 'Quality'],
y=[result['conversion_rate'], result['quality_score']],
marker_color=['#3b82f6', '#10b981']
))
fig.update_layout(title=f"Metrics ({NUM_GPUS} GPUs)", yaxis_range=[0, 1])
return output_md, fig
except Exception as e:
import traceback
return f"❌ Error:\n```\n{traceback.format_exc()}\n```", None
def view_history():
"""History"""
try:
history = db.get_history(20)
if not history:
return "πŸ“­ No history", None
df = pd.DataFrame(history)
fig = px.scatter(
df,
x='timestamp',
y='quality_score',
color='finetuned',
size='num_gpus',
title='Burning History (Multi-GPU)'
)
return f"## History\n\n{df.to_markdown(index=False)}", fig
except Exception as e:
return f"❌ Error: {e}", None
# =====================================================
# Gradio App
# =====================================================
with gr.Blocks(title="πŸ”₯ PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as demo:
gr.Markdown(f"""
# πŸ”₯ PHOENIX v2.0 - Multi-GPU Optimized
**H100 x {NUM_GPUS} GPUs Ready**
πŸ†• **v2.0 Multi-GPU**: Accelerate 톡합, DDP 지원
πŸ†• **v2.0**: Fine-tuning νŒŒμ΄ν”„λΌμΈ (Brumby-style)
βœ… v1.4.3: All fixes included
βœ… GQA Support | O(n) Complexity
---
""")
with gr.Tabs():
with gr.Tab("πŸ”₯ Model Burning"):
with gr.Row():
with gr.Column(scale=1):
burn_url = gr.Textbox(
label="πŸ”— Model URL",
value=DEFAULT_MODEL,
placeholder="Qwen/Qwen3-0.6B"
)
burn_hier = gr.Checkbox(value=True, label="Hierarchical Retention")
burn_name = gr.Textbox(label="πŸ’Ύ Output Name", placeholder="my_model")
gr.Markdown("---")
gr.Markdown(f"### πŸ†• Fine-tuning ({NUM_GPUS} GPUs)")
burn_ft_enable = gr.Checkbox(
value=False,
label="πŸš€ Enable Fine-tuning (Brumby-style)",
info=f"Multi-GPU acceleration with {NUM_GPUS} GPUs!"
)
burn_ft_steps = gr.Slider(
1000, 10000, 3000,
step=100,
label="Steps",
visible=False
)
burn_ft_batch = gr.Slider(
1, 16, 4,
step=1,
label=f"Batch Size per GPU ({NUM_GPUS} GPUs)",
visible=False
)
burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
burn_grad_ckpt = gr.Checkbox(
value=True,
label="βœ… Gradient Checkpointing (saves memory)",
visible=False
)
def toggle_ft(enabled):
return [
gr.update(visible=enabled),
gr.update(visible=enabled),
gr.update(visible=enabled),
gr.update(visible=enabled),
]
burn_ft_enable.change(
toggle_ft,
[burn_ft_enable],
[burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt]
)
gr.Markdown("---")
gr.Markdown("### 🌐 Hub Upload")
burn_upload = gr.Checkbox(value=True, label="πŸ“€ Upload to Hub")
burn_repo = gr.Textbox(label="πŸ“¦ Repo Name (optional)")
burn_private = gr.Checkbox(value=True, label="πŸ”’ Private")
burn_btn = gr.Button("πŸ”₯ Burn Model", variant="primary", size="lg")
with gr.Column(scale=2):
burn_output = gr.Markdown()
burn_plot = gr.Plot()
burn_btn.click(
burn_phoenix_model_ui,
[
burn_url, burn_hier, burn_name,
burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr, burn_grad_ckpt,
burn_upload, burn_repo, burn_private
],
[burn_output, burn_plot]
)
with gr.Tab("πŸ“Š History"):
with gr.Row():
with gr.Column(scale=1):
hist_btn = gr.Button("πŸ“Š Load", variant="primary")
with gr.Column(scale=2):
hist_out = gr.Markdown()
hist_plot = gr.Plot()
hist_btn.click(view_history, outputs=[hist_out, hist_plot])
gr.Markdown(f"""
---
## πŸ”₯ PHOENIX v2.0 Multi-GPU
**Hardware**: {NUM_GPUS} x {torch.cuda.get_device_name(0) if NUM_GPUS > 0 else 'N/A'}
**Features**:
- πŸ†• Multi-GPU Training (DDP)
- πŸ†• Gradient Checkpointing
- πŸ†• H100 Optimized (fused optimizer)
- πŸ†• Brumby-style Fine-tuning
- βœ… All v1.4.3 Fixes
**Token**: {'βœ…' if HF_TOKEN else '❌ Not Found'}
**VIDraft AI Research Lab** | PHOENIX v2.0 Multi-GPU
""")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='PHOENIX v2.0 Multi-GPU')
parser.add_argument('--port', type=int, default=None, help='Server port (default: auto find 7860-7960)')
parser.add_argument('--share', action='store_true', help='Create public Gradio link')
parser.add_argument('--host', type=str, default="0.0.0.0", help='Server host')
args = parser.parse_args()
demo.queue(max_size=20)
# 포트 μžλ™ μ°ΎκΈ°
if args.port is None:
# 7860λΆ€ν„° 7960κΉŒμ§€ μ‹œλ„
for port in range(7860, 7960):
try:
demo.launch(
server_name=args.host,
server_port=port,
share=args.share,
show_error=True
)
break
except OSError:
continue
else:
demo.launch(
server_name=args.host,
server_port=args.port,
share=args.share,
show_error=True
)