seawolf2357 commited on
Commit
23f9fc2
·
verified ·
1 Parent(s): c381ead

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -5
app.py CHANGED
@@ -48,7 +48,8 @@ from huggingface_hub import HfApi, create_repo
48
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
49
  NUM_GPUS = torch.cuda.device_count()
50
 
51
- STORAGE_PATH = "/data"
 
52
  DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
53
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
54
  DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
@@ -56,8 +57,19 @@ DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
56
  # HuggingFace Token
57
  HF_TOKEN = os.getenv("HF_TOKEN")
58
 
59
- Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
60
- Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
63
  print(f"💾 Storage: {STORAGE_PATH}")
@@ -486,12 +498,16 @@ def finetune_retention_model(
486
  num_steps: int = 3000,
487
  batch_size: int = 4,
488
  learning_rate: float = 1e-5,
489
- output_dir: str = "/data/finetuning_temp",
490
  use_gradient_checkpointing: bool = True,
491
  ):
492
  """
493
  🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
494
  """
 
 
 
 
495
  print("\n" + "="*80)
496
  print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
497
  print("="*80)
@@ -1513,5 +1529,34 @@ with gr.Blocks(title="🔥 PHOENIX v2.0 Multi-GPU", theme=gr.themes.Soft()) as d
1513
 
1514
 
1515
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
1516
  demo.queue(max_size=20)
1517
- demo.launch(server_name="0.0.0.0", server_port=7860, share=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
49
  NUM_GPUS = torch.cuda.device_count()
50
 
51
+ # 우분투 호환: 홈 디렉토리 또는 환경 변수 사용
52
+ STORAGE_PATH = os.getenv("PHOENIX_STORAGE_PATH", str(Path.home() / "phoenix_data"))
53
  DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
54
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
55
  DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
 
57
  # HuggingFace Token
58
  HF_TOKEN = os.getenv("HF_TOKEN")
59
 
60
+ # 디렉토리 생성 (권한 오류 처리)
61
+ try:
62
+ Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
63
+ Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
64
+ print(f"✅ Storage initialized: {STORAGE_PATH}")
65
+ except PermissionError:
66
+ print(f"⚠️ Permission denied for {STORAGE_PATH}")
67
+ print(f" Using current directory instead")
68
+ STORAGE_PATH = "./phoenix_data"
69
+ DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
70
+ MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
71
+ Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
72
+ Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
73
 
74
  print(f"🔥 PHOENIX Platform v2.0 - Multi-GPU Optimized")
75
  print(f"💾 Storage: {STORAGE_PATH}")
 
498
  num_steps: int = 3000,
499
  batch_size: int = 4,
500
  learning_rate: float = 1e-5,
501
+ output_dir: str = None,
502
  use_gradient_checkpointing: bool = True,
503
  ):
504
  """
505
  🆕 v2.0: Brumby-style Retraining with Multi-GPU Support
506
  """
507
+ # output_dir 기본값 설정
508
+ if output_dir is None:
509
+ output_dir = f"{STORAGE_PATH}/finetuning_temp"
510
+
511
  print("\n" + "="*80)
512
  print("🔥 PHOENIX RETRAINING - Multi-GPU (v2.0)")
513
  print("="*80)
 
1529
 
1530
 
1531
  if __name__ == "__main__":
1532
+ import argparse
1533
+
1534
+ parser = argparse.ArgumentParser(description='PHOENIX v2.0 Multi-GPU')
1535
+ parser.add_argument('--port', type=int, default=None, help='Server port (default: auto find 7860-7960)')
1536
+ parser.add_argument('--share', action='store_true', help='Create public Gradio link')
1537
+ parser.add_argument('--host', type=str, default="0.0.0.0", help='Server host')
1538
+ args = parser.parse_args()
1539
+
1540
  demo.queue(max_size=20)
1541
+
1542
+ # 포트 자동 찾기
1543
+ if args.port is None:
1544
+ # 7860부터 7960까지 시도
1545
+ for port in range(7860, 7960):
1546
+ try:
1547
+ demo.launch(
1548
+ server_name=args.host,
1549
+ server_port=port,
1550
+ share=args.share,
1551
+ show_error=True
1552
+ )
1553
+ break
1554
+ except OSError:
1555
+ continue
1556
+ else:
1557
+ demo.launch(
1558
+ server_name=args.host,
1559
+ server_port=args.port,
1560
+ share=args.share,
1561
+ show_error=True
1562
+ )