Update app.py
Browse files
app.py
CHANGED
|
@@ -48,7 +48,8 @@ from huggingface_hub import HfApi, create_repo
|
|
| 48 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 49 |
NUM_GPUS = torch.cuda.device_count()
|
| 50 |
|
| 51 |
-
|
|
|
|
| 52 |
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
|
| 53 |
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
|
| 54 |
DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
|
|
@@ -56,8 +57,19 @@ DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
|
|
| 56 |
# HuggingFace Token
|
| 57 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
|
| 63 |
print(f"💾 Storage: {STORAGE_PATH}")
|
|
@@ -486,12 +498,16 @@ def finetune_retention_model(
|
|
| 486 |
num_steps: int = 3000,
|
| 487 |
batch_size: int = 4,
|
| 488 |
learning_rate: float = 1e-5,
|
| 489 |
-
output_dir: str =
|
| 490 |
use_gradient_checkpointing: bool = True,
|
| 491 |
):
|
| 492 |
"""
|
| 493 |
🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
|
| 494 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 495 |
print("\n" + "="*80)
|
| 496 |
print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
|
| 497 |
print("="*80)
|
|
@@ -1513,5 +1529,34 @@ with gr.Blocks(title="🔥 PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as d
|
|
| 1513 |
|
| 1514 |
|
| 1515 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1516 |
demo.queue(max_size=20)
|
| 1517 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 49 |
NUM_GPUS = torch.cuda.device_count()
|
| 50 |
|
| 51 |
+
# ✅ 우분투 호환: 홈 디렉토리 또는 환경 변수 사용
|
| 52 |
+
STORAGE_PATH = os.getenv("PHOENIX_STORAGE_PATH", str(Path.home() / "phoenix_data"))
|
| 53 |
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
|
| 54 |
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
|
| 55 |
DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
|
|
|
|
| 57 |
# HuggingFace Token
|
| 58 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 59 |
|
| 60 |
+
# 디렉토리 생성 (권한 오류 처리)
|
| 61 |
+
try:
|
| 62 |
+
Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
|
| 63 |
+
Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
|
| 64 |
+
print(f"✅ Storage initialized: {STORAGE_PATH}")
|
| 65 |
+
except PermissionError:
|
| 66 |
+
print(f"⚠️ Permission denied for {STORAGE_PATH}")
|
| 67 |
+
print(f" Using current directory instead")
|
| 68 |
+
STORAGE_PATH = "./phoenix_data"
|
| 69 |
+
DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
|
| 70 |
+
MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
|
| 71 |
+
Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
|
| 72 |
+
Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
|
| 73 |
|
| 74 |
print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
|
| 75 |
print(f"💾 Storage: {STORAGE_PATH}")
|
|
|
|
| 498 |
num_steps: int = 3000,
|
| 499 |
batch_size: int = 4,
|
| 500 |
learning_rate: float = 1e-5,
|
| 501 |
+
output_dir: str = None,
|
| 502 |
use_gradient_checkpointing: bool = True,
|
| 503 |
):
|
| 504 |
"""
|
| 505 |
🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
|
| 506 |
"""
|
| 507 |
+
# output_dir 기본값 설정
|
| 508 |
+
if output_dir is None:
|
| 509 |
+
output_dir = f"{STORAGE_PATH}/finetuning_temp"
|
| 510 |
+
|
| 511 |
print("\n" + "="*80)
|
| 512 |
print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
|
| 513 |
print("="*80)
|
|
|
|
| 1529 |
|
| 1530 |
|
| 1531 |
if __name__ == "__main__":
|
| 1532 |
+
import argparse
|
| 1533 |
+
|
| 1534 |
+
parser = argparse.ArgumentParser(description='PHOENIX v2.0 Multi-GPU')
|
| 1535 |
+
parser.add_argument('--port', type=int, default=None, help='Server port (default: auto find 7860-7960)')
|
| 1536 |
+
parser.add_argument('--share', action='store_true', help='Create public Gradio link')
|
| 1537 |
+
parser.add_argument('--host', type=str, default="0.0.0.0", help='Server host')
|
| 1538 |
+
args = parser.parse_args()
|
| 1539 |
+
|
| 1540 |
demo.queue(max_size=20)
|
| 1541 |
+
|
| 1542 |
+
# 포트 자동 찾기
|
| 1543 |
+
if args.port is None:
|
| 1544 |
+
# 7860부터 7960까지 시도
|
| 1545 |
+
for port in range(7860, 7960):
|
| 1546 |
+
try:
|
| 1547 |
+
demo.launch(
|
| 1548 |
+
server_name=args.host,
|
| 1549 |
+
server_port=port,
|
| 1550 |
+
share=args.share,
|
| 1551 |
+
show_error=True
|
| 1552 |
+
)
|
| 1553 |
+
break
|
| 1554 |
+
except OSError:
|
| 1555 |
+
continue
|
| 1556 |
+
else:
|
| 1557 |
+
demo.launch(
|
| 1558 |
+
server_name=args.host,
|
| 1559 |
+
server_port=args.port,
|
| 1560 |
+
share=args.share,
|
| 1561 |
+
show_error=True
|
| 1562 |
+
)
|