seawolf2357 commited on
Commit
35b8c28
·
verified ·
1 Parent(s): ba6990e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +170 -15
app.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
  🔮 PHOENIX Retention Research Platform - FINAL INTEGRATED VERSION
3
- Zero-shot Model Burning + Optional Fine-tuning
4
 
5
  ✅ Zero-shot Conversion (No Dataset Required)
6
  ✅ Optional Fine-tuning (Dataset-based)
7
  ✅ GQA Support
8
- ✅ HuggingFace Hub Integration
9
  ✅ Comprehensive Evaluation
10
 
11
  VIDraft AI Research Lab
@@ -37,6 +37,8 @@ from accelerate import Accelerator
37
  from tqdm import tqdm
38
  import copy
39
  import shutil
 
 
40
 
41
  # =====================================================
42
  # 전역 설정
@@ -49,6 +51,9 @@ VECTOR_DB_PATH = f"{STORAGE_PATH}/vector_store"
49
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
50
  DEFAULT_MODEL = "ibm-granite/granite-4.0-h-350m"
51
 
 
 
 
52
  Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
53
  Path(VECTOR_DB_PATH).mkdir(parents=True, exist_ok=True)
54
  Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
@@ -56,6 +61,10 @@ Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
56
  print(f"🚀 PHOENIX Platform initialized on {DEVICE}")
57
  print(f"💾 Storage: {STORAGE_PATH}")
58
  print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 
 
 
 
59
 
60
  # =====================================================
61
  # PHOENIX Retention with GQA Support
@@ -481,6 +490,7 @@ class ExperimentDatabase:
481
  id INTEGER PRIMARY KEY AUTOINCREMENT,
482
  model_url TEXT NOT NULL,
483
  output_path TEXT NOT NULL,
 
484
  use_hierarchical BOOLEAN,
485
  dataset_used BOOLEAN,
486
  conversion_rate REAL,
@@ -523,13 +533,14 @@ class ExperimentDatabase:
523
  cursor = conn.cursor()
524
  cursor.execute("""
525
  INSERT INTO burning_history (
526
- model_url, output_path, use_hierarchical,
527
  dataset_used, conversion_rate, training_steps,
528
  final_loss, evaluation_score
529
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
530
  """, (
531
  burning_info.get('model_url'),
532
  burning_info.get('output_path'),
 
533
  burning_info.get('use_hierarchical'),
534
  burning_info.get('dataset_used'),
535
  burning_info.get('conversion_rate'),
@@ -548,6 +559,83 @@ class ExperimentDatabase:
548
  return [dict(row) for row in cursor.fetchall()]
549
 
550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
551
  # =====================================================
552
  # 모델 버닝 (Zero-shot + Optional Fine-tuning)
553
  # =====================================================
@@ -995,7 +1083,6 @@ def generate_text_phoenix(
995
 
996
  output_md = f"""
997
  ## 📝 Generated Text
998
-
999
  ```
1000
  {generated}
1001
  ```
@@ -1026,9 +1113,12 @@ def burn_phoenix_model_ui(
1026
  batch_size,
1027
  learning_rate,
1028
  max_steps,
 
 
 
1029
  ):
1030
  """
1031
- Gradio UI용 모델 버닝 함수
1032
  """
1033
  try:
1034
  if not model_url.strip():
@@ -1065,10 +1155,25 @@ def burn_phoenix_model_ui(
1065
  )
1066
 
1067
  if result['status'] == 'success':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1068
  # Save to database
1069
  burning_info = {
1070
  'model_url': model_url,
1071
  'output_path': result['model_path'],
 
1072
  'use_hierarchical': use_hierarchical,
1073
  'dataset_used': has_dataset,
1074
  'conversion_rate': result.get('conversion_rate', 0.0),
@@ -1087,7 +1192,22 @@ def burn_phoenix_model_ui(
1087
  - **Original**: {model_url}
1088
  - **Output**: `{result['model_path']}`
1089
  - **Type**: {'Fine-tuning' if has_dataset else 'Zero-shot'}
1090
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1091
  ## 📊 Metrics
1092
  - **Conversion Rate**: {result['conversion_rate']*100:.1f}%
1093
  - **Quality Score**: {result.get('quality_score', 0.0):.2f}/1.00
@@ -1113,13 +1233,22 @@ def burn_phoenix_model_ui(
1113
 
1114
  output_md += f"""
1115
  ## 🎯 Usage
1116
-
1117
  ```python
1118
  from transformers import AutoModelForCausalLM, AutoTokenizer
1119
 
 
1120
  model = AutoModelForCausalLM.from_pretrained("{result['model_path']}")
1121
  tokenizer = AutoTokenizer.from_pretrained("{result['model_path']}")
1122
-
 
 
 
 
 
 
 
 
 
1123
  inputs = tokenizer("Your prompt", return_tensors="pt")
1124
  outputs = model.generate(**inputs, max_new_tokens=50)
1125
  print(tokenizer.decode(outputs[0]))
@@ -1168,11 +1297,11 @@ def view_burning_history():
1168
  y='evaluation_score',
1169
  size='conversion_rate',
1170
  color='dataset_used',
1171
- hover_data=['model_url', 'output_path'],
1172
  title='Burning History'
1173
  )
1174
 
1175
- cols = ['id', 'model_url', 'output_path', 'conversion_rate',
1176
  'evaluation_score', 'training_steps', 'timestamp']
1177
  available = [c for c in cols if c in df.columns]
1178
 
@@ -1199,12 +1328,13 @@ with gr.Blocks(
1199
  gr.Markdown("""
1200
  # 🔮 PHOENIX Retention Platform
1201
 
1202
- **Zero-shot Model Burning + Optional Fine-tuning**
1203
 
1204
  ✅ Zero-shot Conversion (데이터셋 불필요!)
1205
  ✅ Optional Fine-tuning (데이터셋 기반)
1206
  ✅ GQA Support
1207
  ✅ O(n) Complexity
 
1208
 
1209
  ---
1210
  """)
@@ -1244,6 +1374,7 @@ with gr.Blocks(
1244
 
1245
  - **Zero-shot**: 데이터셋 없이 변환만 수행 (빠름!)
1246
  - **Fine-tuning**: 데이터셋으로 추가 학습 (성능 향상)
 
1247
  """)
1248
 
1249
  with gr.Row():
@@ -1260,6 +1391,24 @@ with gr.Blocks(
1260
  placeholder="phoenix_my_model (auto-generated if empty)"
1261
  )
1262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
  gr.Markdown("---")
1264
  gr.Markdown("### 📊 Dataset (Optional)")
1265
 
@@ -1298,6 +1447,9 @@ with gr.Blocks(
1298
  burn_batch,
1299
  burn_lr,
1300
  burn_max_steps,
 
 
 
1301
  ],
1302
  [burn_output, burn_plot]
1303
  )
@@ -1352,21 +1504,24 @@ with gr.Blocks(
1352
 
1353
  hist_btn.click(view_burning_history, outputs=[hist_output, hist_plot])
1354
 
1355
- gr.Markdown("""
1356
  ---
1357
 
1358
  ## 🔥 PHOENIX Model Burning
1359
 
1360
  ### Zero-shot (데이터셋 불필요!)
1361
  1. 모델 URL 입력
1362
- 2. "Burn Model" 클릭
1363
- 3. 완료! `/data/phoenix_models/` 에 저장
 
1364
 
1365
  ### Fine-tuning (선택사항)
1366
  1. Dataset Path 입력
1367
  2. "Enable Fine-tuning" 체크
1368
  3. "Burn Model" 클릭
1369
 
 
 
1370
  **VIDraft AI Research Lab** | PHOENIX v1.0
1371
  """)
1372
 
 
1
  """
2
  🔮 PHOENIX Retention Research Platform - FINAL INTEGRATED VERSION
3
+ Zero-shot Model Burning + Optional Fine-tuning + HuggingFace Hub Auto-Upload
4
 
5
  ✅ Zero-shot Conversion (No Dataset Required)
6
  ✅ Optional Fine-tuning (Dataset-based)
7
  ✅ GQA Support
8
+ ✅ HuggingFace Hub Integration (Auto Upload)
9
  ✅ Comprehensive Evaluation
10
 
11
  VIDraft AI Research Lab
 
37
  from tqdm import tqdm
38
  import copy
39
  import shutil
40
+ import os
41
+ from huggingface_hub import HfApi, create_repo
42
 
43
  # =====================================================
44
  # 전역 설정
 
51
  MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
52
  DEFAULT_MODEL = "ibm-granite/granite-4.0-h-350m"
53
 
54
+ # HuggingFace Token
55
+ HF_TOKEN = os.getenv("HF_TOKEN")
56
+
57
  Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
58
  Path(VECTOR_DB_PATH).mkdir(parents=True, exist_ok=True)
59
  Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
 
61
  print(f"🚀 PHOENIX Platform initialized on {DEVICE}")
62
  print(f"💾 Storage: {STORAGE_PATH}")
63
  print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
64
+ if HF_TOKEN:
65
+ print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
66
+ else:
67
+ print(f"⚠️ HuggingFace Token not found (upload disabled)")
68
 
69
  # =====================================================
70
  # PHOENIX Retention with GQA Support
 
490
  id INTEGER PRIMARY KEY AUTOINCREMENT,
491
  model_url TEXT NOT NULL,
492
  output_path TEXT NOT NULL,
493
+ hub_url TEXT,
494
  use_hierarchical BOOLEAN,
495
  dataset_used BOOLEAN,
496
  conversion_rate REAL,
 
533
  cursor = conn.cursor()
534
  cursor.execute("""
535
  INSERT INTO burning_history (
536
+ model_url, output_path, hub_url, use_hierarchical,
537
  dataset_used, conversion_rate, training_steps,
538
  final_loss, evaluation_score
539
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
540
  """, (
541
  burning_info.get('model_url'),
542
  burning_info.get('output_path'),
543
+ burning_info.get('hub_url'),
544
  burning_info.get('use_hierarchical'),
545
  burning_info.get('dataset_used'),
546
  burning_info.get('conversion_rate'),
 
559
  return [dict(row) for row in cursor.fetchall()]
560
 
561
 
562
+ # =====================================================
563
+ # HuggingFace Hub Upload
564
+ # =====================================================
565
+
566
+ def upload_to_huggingface_hub(
567
+ model_path: str,
568
+ original_model_url: str,
569
+ repo_name: str = None,
570
+ private: bool = True,
571
+ token: str = None
572
+ ) -> Tuple[bool, str, str]:
573
+ """
574
+ Upload PHOENIX model to HuggingFace Hub
575
+
576
+ Returns:
577
+ (success, hub_url, message)
578
+ """
579
+ if token is None:
580
+ token = HF_TOKEN
581
+
582
+ if not token:
583
+ return False, "", "❌ HF_TOKEN not found. Set HF_TOKEN environment variable."
584
+
585
+ try:
586
+ api = HfApi(token=token)
587
+
588
+ # Get username
589
+ user_info = api.whoami(token=token)
590
+ username = user_info['name']
591
+
592
+ # Auto-generate repo name
593
+ if not repo_name:
594
+ base_name = original_model_url.split('/')[-1]
595
+ repo_name = f"phoenix-{base_name}"
596
+
597
+ repo_id = f"{username}/{repo_name}"
598
+
599
+ print(f"\n📤 Uploading to HuggingFace Hub...")
600
+ print(f" Repo: {repo_id}")
601
+ print(f" Private: {private}")
602
+
603
+ # Create repo
604
+ try:
605
+ create_repo(
606
+ repo_id=repo_id,
607
+ token=token,
608
+ private=private,
609
+ repo_type="model",
610
+ exist_ok=True
611
+ )
612
+ print(f" ✅ Repository created/verified")
613
+ except Exception as e:
614
+ print(f" ⚠️ Repository creation: {e}")
615
+
616
+ # Upload folder
617
+ print(f" 📦 Uploading files...")
618
+ api.upload_folder(
619
+ folder_path=model_path,
620
+ repo_id=repo_id,
621
+ repo_type="model",
622
+ token=token,
623
+ )
624
+
625
+ hub_url = f"https://huggingface.co/{repo_id}"
626
+
627
+ print(f" ✅ Upload complete!")
628
+ print(f" 🔗 {hub_url}")
629
+
630
+ return True, hub_url, f"✅ Successfully uploaded to {hub_url}"
631
+
632
+ except Exception as e:
633
+ import traceback
634
+ error_msg = traceback.format_exc()
635
+ print(f"\n❌ Upload failed:\n{error_msg}")
636
+ return False, "", f"❌ Upload failed: {str(e)}"
637
+
638
+
639
  # =====================================================
640
  # 모델 버닝 (Zero-shot + Optional Fine-tuning)
641
  # =====================================================
 
1083
 
1084
  output_md = f"""
1085
  ## 📝 Generated Text
 
1086
  ```
1087
  {generated}
1088
  ```
 
1113
  batch_size,
1114
  learning_rate,
1115
  max_steps,
1116
+ upload_to_hub,
1117
+ hub_repo_name,
1118
+ hub_private,
1119
  ):
1120
  """
1121
+ Gradio UI용 모델 버닝 함수 (HuggingFace Hub Upload 포함)
1122
  """
1123
  try:
1124
  if not model_url.strip():
 
1155
  )
1156
 
1157
  if result['status'] == 'success':
1158
+ hub_url = None
1159
+
1160
+ # Upload to HuggingFace Hub (if enabled)
1161
+ if upload_to_hub:
1162
+ success, hub_url, upload_msg = upload_to_huggingface_hub(
1163
+ model_path=result['model_path'],
1164
+ original_model_url=model_url,
1165
+ repo_name=hub_repo_name if hub_repo_name.strip() else None,
1166
+ private=hub_private,
1167
+ )
1168
+
1169
+ if not success:
1170
+ print(f"\n{upload_msg}")
1171
+
1172
  # Save to database
1173
  burning_info = {
1174
  'model_url': model_url,
1175
  'output_path': result['model_path'],
1176
+ 'hub_url': hub_url,
1177
  'use_hierarchical': use_hierarchical,
1178
  'dataset_used': has_dataset,
1179
  'conversion_rate': result.get('conversion_rate', 0.0),
 
1192
  - **Original**: {model_url}
1193
  - **Output**: `{result['model_path']}`
1194
  - **Type**: {'Fine-tuning' if has_dataset else 'Zero-shot'}
1195
+ """
1196
+
1197
+ if hub_url:
1198
+ output_md += f"""
1199
+ ## 🌐 HuggingFace Hub
1200
+ - **URL**: [{hub_url}]({hub_url})
1201
+ - **Private**: {hub_private}
1202
+ - **Status**: ✅ Uploaded
1203
+ """
1204
+ elif upload_to_hub:
1205
+ output_md += f"""
1206
+ ## 🌐 HuggingFace Hub
1207
+ - **Status**: ❌ Upload failed (check logs)
1208
+ """
1209
+
1210
+ output_md += f"""
1211
  ## 📊 Metrics
1212
  - **Conversion Rate**: {result['conversion_rate']*100:.1f}%
1213
  - **Quality Score**: {result.get('quality_score', 0.0):.2f}/1.00
 
1233
 
1234
  output_md += f"""
1235
  ## 🎯 Usage
 
1236
  ```python
1237
  from transformers import AutoModelForCausalLM, AutoTokenizer
1238
 
1239
+ # Local
1240
  model = AutoModelForCausalLM.from_pretrained("{result['model_path']}")
1241
  tokenizer = AutoTokenizer.from_pretrained("{result['model_path']}")
1242
+ """
1243
+
1244
+ if hub_url:
1245
+ output_md += f"""
1246
+ # From HuggingFace Hub
1247
+ model = AutoModelForCausalLM.from_pretrained("{hub_url.replace('https://huggingface.co/', '')}")
1248
+ tokenizer = AutoTokenizer.from_pretrained("{hub_url.replace('https://huggingface.co/', '')}")
1249
+ """
1250
+
1251
+ output_md += f"""
1252
  inputs = tokenizer("Your prompt", return_tensors="pt")
1253
  outputs = model.generate(**inputs, max_new_tokens=50)
1254
  print(tokenizer.decode(outputs[0]))
 
1297
  y='evaluation_score',
1298
  size='conversion_rate',
1299
  color='dataset_used',
1300
+ hover_data=['model_url', 'output_path', 'hub_url'],
1301
  title='Burning History'
1302
  )
1303
 
1304
+ cols = ['id', 'model_url', 'hub_url', 'conversion_rate',
1305
  'evaluation_score', 'training_steps', 'timestamp']
1306
  available = [c for c in cols if c in df.columns]
1307
 
 
1328
  gr.Markdown("""
1329
  # 🔮 PHOENIX Retention Platform
1330
 
1331
+ **Zero-shot Model Burning + Optional Fine-tuning + HuggingFace Hub Auto-Upload**
1332
 
1333
  ✅ Zero-shot Conversion (데이터셋 불필요!)
1334
  ✅ Optional Fine-tuning (데이터셋 기반)
1335
  ✅ GQA Support
1336
  ✅ O(n) Complexity
1337
+ ✅ Auto Upload to HuggingFace Hub
1338
 
1339
  ---
1340
  """)
 
1374
 
1375
  - **Zero-shot**: 데이터셋 없이 변환만 수행 (빠름!)
1376
  - **Fine-tuning**: 데이터셋으로 추가 학습 (성능 향상)
1377
+ - **HuggingFace Hub**: 자동으로 Hub에 업로드 (Private 기본)
1378
  """)
1379
 
1380
  with gr.Row():
 
1391
  placeholder="phoenix_my_model (auto-generated if empty)"
1392
  )
1393
 
1394
+ gr.Markdown("---")
1395
+ gr.Markdown("### 🌐 HuggingFace Hub Upload")
1396
+
1397
+ burn_upload_hub = gr.Checkbox(
1398
+ value=True,
1399
+ label="📤 Upload to HuggingFace Hub"
1400
+ )
1401
+
1402
+ burn_hub_repo = gr.Textbox(
1403
+ label="📦 Hub Repository Name (optional)",
1404
+ placeholder="phoenix-granite-350m (auto-generated if empty)"
1405
+ )
1406
+
1407
+ burn_hub_private = gr.Checkbox(
1408
+ value=True,
1409
+ label="🔒 Private Repository"
1410
+ )
1411
+
1412
  gr.Markdown("---")
1413
  gr.Markdown("### 📊 Dataset (Optional)")
1414
 
 
1447
  burn_batch,
1448
  burn_lr,
1449
  burn_max_steps,
1450
+ burn_upload_hub,
1451
+ burn_hub_repo,
1452
+ burn_hub_private,
1453
  ],
1454
  [burn_output, burn_plot]
1455
  )
 
1504
 
1505
  hist_btn.click(view_burning_history, outputs=[hist_output, hist_plot])
1506
 
1507
+ gr.Markdown(f"""
1508
  ---
1509
 
1510
  ## 🔥 PHOENIX Model Burning
1511
 
1512
  ### Zero-shot (데이터셋 불필요!)
1513
  1. 모델 URL 입력
1514
+ 2. "Upload to HuggingFace Hub" 체크 (기본 Private)
1515
+ 3. "Burn Model" 클릭
1516
+ 4. 완료! → 로컬 + Hub에 자동 업로드
1517
 
1518
  ### Fine-tuning (선택사항)
1519
  1. Dataset Path 입력
1520
  2. "Enable Fine-tuning" 체크
1521
  3. "Burn Model" 클릭
1522
 
1523
+ **HuggingFace Token Status**: {'✅ Connected' if HF_TOKEN else '❌ Not Found (set HF_TOKEN env)'}
1524
+
1525
  **VIDraft AI Research Lab** | PHOENIX v1.0
1526
  """)
1527