Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on 29 days ago

Commit

35b8c28

verified ·

1 Parent(s): ba6990e

Update app.py

Browse files

Files changed (1) hide show

app.py +170 -15

app.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
 🔮 PHOENIX Retention Research Platform - FINAL INTEGRATED VERSION
-Zero-shot Model Burning + Optional Fine-tuning
 ✅ Zero-shot Conversion (No Dataset Required)
 ✅ Optional Fine-tuning (Dataset-based)
 ✅ GQA Support
-✅ HuggingFace Hub Integration
 ✅ Comprehensive Evaluation
 VIDraft AI Research Lab
@@ -37,6 +37,8 @@ from accelerate import Accelerator
 from tqdm import tqdm
 import copy
 import shutil
 # =====================================================
 # 전역 설정
@@ -49,6 +51,9 @@ VECTOR_DB_PATH = f"{STORAGE_PATH}/vector_store"
 MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
 DEFAULT_MODEL = "ibm-granite/granite-4.0-h-350m"
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
 Path(VECTOR_DB_PATH).mkdir(parents=True, exist_ok=True)
 Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
@@ -56,6 +61,10 @@ Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
 print(f"🚀 PHOENIX Platform initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
 print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 # =====================================================
 # PHOENIX Retention with GQA Support
@@ -481,6 +490,7 @@ class ExperimentDatabase:
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     model_url TEXT NOT NULL,
                     output_path TEXT NOT NULL,
                     use_hierarchical BOOLEAN,
                     dataset_used BOOLEAN,
                     conversion_rate REAL,
@@ -523,13 +533,14 @@ class ExperimentDatabase:
             cursor = conn.cursor()
             cursor.execute("""
                 INSERT INTO burning_history (
-                    model_url, output_path, use_hierarchical,
                     dataset_used, conversion_rate, training_steps,
                     final_loss, evaluation_score
-                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
             """, (
                 burning_info.get('model_url'),
                 burning_info.get('output_path'),
                 burning_info.get('use_hierarchical'),
                 burning_info.get('dataset_used'),
                 burning_info.get('conversion_rate'),
@@ -548,6 +559,83 @@ class ExperimentDatabase:
             return [dict(row) for row in cursor.fetchall()]
 # =====================================================
 # 모델 버닝 (Zero-shot + Optional Fine-tuning)
 # =====================================================
@@ -995,7 +1083,6 @@ def generate_text_phoenix(
         output_md = f"""
 ## 📝 Generated Text
 ```
 {generated}
 ```
@@ -1026,9 +1113,12 @@ def burn_phoenix_model_ui(
     batch_size,
     learning_rate,
     max_steps,
 ):
     """
-    Gradio UI용 모델 버닝 함수
     """
     try:
         if not model_url.strip():
@@ -1065,10 +1155,25 @@ def burn_phoenix_model_ui(
             )
         if result['status'] == 'success':
             # Save to database
             burning_info = {
                 'model_url': model_url,
                 'output_path': result['model_path'],
                 'use_hierarchical': use_hierarchical,
                 'dataset_used': has_dataset,
                 'conversion_rate': result.get('conversion_rate', 0.0),
@@ -1087,7 +1192,22 @@ def burn_phoenix_model_ui(
 - **Original**: {model_url}
 - **Output**: `{result['model_path']}`
 - **Type**: {'Fine-tuning' if has_dataset else 'Zero-shot'}
 ## 📊 Metrics
 - **Conversion Rate**: {result['conversion_rate']*100:.1f}%
 - **Quality Score**: {result.get('quality_score', 0.0):.2f}/1.00
@@ -1113,13 +1233,22 @@ def burn_phoenix_model_ui(
             output_md += f"""
 ## 🎯 Usage
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model = AutoModelForCausalLM.from_pretrained("{result['model_path']}")
 tokenizer = AutoTokenizer.from_pretrained("{result['model_path']}")
 inputs = tokenizer("Your prompt", return_tensors="pt")
 outputs = model.generate(**inputs, max_new_tokens=50)
 print(tokenizer.decode(outputs[0]))
@@ -1168,11 +1297,11 @@ def view_burning_history():
             y='evaluation_score',
             size='conversion_rate',
             color='dataset_used',
-            hover_data=['model_url', 'output_path'],
             title='Burning History'
         )
-        cols = ['id', 'model_url', 'output_path', 'conversion_rate',
                 'evaluation_score', 'training_steps', 'timestamp']
         available = [c for c in cols if c in df.columns]
@@ -1199,12 +1328,13 @@ with gr.Blocks(
     gr.Markdown("""
     # 🔮 PHOENIX Retention Platform
-    **Zero-shot Model Burning + Optional Fine-tuning**
     ✅ Zero-shot Conversion (데이터셋 불필요!)
     ✅ Optional Fine-tuning (데이터셋 기반)
     ✅ GQA Support
     ✅ O(n) Complexity
     ---
     """)
@@ -1244,6 +1374,7 @@ with gr.Blocks(
             - **Zero-shot**: 데이터셋 없이 변환만 수행 (빠름!)
             - **Fine-tuning**: 데이터셋으로 추가 학습 (성능 향상)
             """)
             with gr.Row():
@@ -1260,6 +1391,24 @@ with gr.Blocks(
                         placeholder="phoenix_my_model (auto-generated if empty)"
                     )
                     gr.Markdown("---")
                     gr.Markdown("### 📊 Dataset (Optional)")
@@ -1298,6 +1447,9 @@ with gr.Blocks(
                     burn_batch,
                     burn_lr,
                     burn_max_steps,
                 ],
                 [burn_output, burn_plot]
             )
@@ -1352,21 +1504,24 @@ with gr.Blocks(
             hist_btn.click(view_burning_history, outputs=[hist_output, hist_plot])
-    gr.Markdown("""
     ---
     ## 🔥 PHOENIX Model Burning
     ### Zero-shot (데이터셋 불필요!)
     1. 모델 URL 입력
-    2. "Burn Model" 클릭
-    3. 완료! → `/data/phoenix_models/` 에 저장
     ### Fine-tuning (선택사항)
     1. Dataset Path 입력
     2. "Enable Fine-tuning" 체크
     3. "Burn Model" 클릭
     **VIDraft AI Research Lab** | PHOENIX v1.0
     """)

 """
 🔮 PHOENIX Retention Research Platform - FINAL INTEGRATED VERSION
+Zero-shot Model Burning + Optional Fine-tuning + HuggingFace Hub Auto-Upload
 ✅ Zero-shot Conversion (No Dataset Required)
 ✅ Optional Fine-tuning (Dataset-based)
 ✅ GQA Support
+✅ HuggingFace Hub Integration (Auto Upload)
 ✅ Comprehensive Evaluation
 VIDraft AI Research Lab
 from tqdm import tqdm
 import copy
 import shutil
+import os
+from huggingface_hub import HfApi, create_repo
 # =====================================================
 # 전역 설정
 MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
 DEFAULT_MODEL = "ibm-granite/granite-4.0-h-350m"
+# HuggingFace Token
+HF_TOKEN = os.getenv("HF_TOKEN")
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
 Path(VECTOR_DB_PATH).mkdir(parents=True, exist_ok=True)
 Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
 print(f"🚀 PHOENIX Platform initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
 print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
+if HF_TOKEN:
+    print(f"🔑 HuggingFace Token: {'*' * 10}{HF_TOKEN[-4:]}")
+else:
+    print(f"⚠️ HuggingFace Token not found (upload disabled)")
 # =====================================================
 # PHOENIX Retention with GQA Support
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     model_url TEXT NOT NULL,
                     output_path TEXT NOT NULL,
+                    hub_url TEXT,
                     use_hierarchical BOOLEAN,
                     dataset_used BOOLEAN,
                     conversion_rate REAL,
             cursor = conn.cursor()
             cursor.execute("""
                 INSERT INTO burning_history (
+                    model_url, output_path, hub_url, use_hierarchical,
                     dataset_used, conversion_rate, training_steps,
                     final_loss, evaluation_score
+                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
             """, (
                 burning_info.get('model_url'),
                 burning_info.get('output_path'),
+                burning_info.get('hub_url'),
                 burning_info.get('use_hierarchical'),
                 burning_info.get('dataset_used'),
                 burning_info.get('conversion_rate'),
             return [dict(row) for row in cursor.fetchall()]
+# =====================================================
+# HuggingFace Hub Upload
+# =====================================================
+def upload_to_huggingface_hub(
+    model_path: str,
+    original_model_url: str,
+    repo_name: str = None,
+    private: bool = True,
+    token: str = None
+) -> Tuple[bool, str, str]:
+    """
+    Upload PHOENIX model to HuggingFace Hub
+    Returns:
+        (success, hub_url, message)
+    """
+    if token is None:
+        token = HF_TOKEN
+    if not token:
+        return False, "", "❌ HF_TOKEN not found. Set HF_TOKEN environment variable."
+    try:
+        api = HfApi(token=token)
+        # Get username
+        user_info = api.whoami(token=token)
+        username = user_info['name']
+        # Auto-generate repo name
+        if not repo_name:
+            base_name = original_model_url.split('/')[-1]
+            repo_name = f"phoenix-{base_name}"
+        repo_id = f"{username}/{repo_name}"
+        print(f"\n📤 Uploading to HuggingFace Hub...")
+        print(f"   Repo: {repo_id}")
+        print(f"   Private: {private}")
+        # Create repo
+        try:
+            create_repo(
+                repo_id=repo_id,
+                token=token,
+                private=private,
+                repo_type="model",
+                exist_ok=True
+            )
+            print(f"   ✅ Repository created/verified")
+        except Exception as e:
+            print(f"   ⚠️ Repository creation: {e}")
+        # Upload folder
+        print(f"   📦 Uploading files...")
+        api.upload_folder(
+            folder_path=model_path,
+            repo_id=repo_id,
+            repo_type="model",
+            token=token,
+        )
+        hub_url = f"https://huggingface.co/{repo_id}"
+        print(f"   ✅ Upload complete!")
+        print(f"   🔗 {hub_url}")
+        return True, hub_url, f"✅ Successfully uploaded to {hub_url}"
+    except Exception as e:
+        import traceback
+        error_msg = traceback.format_exc()
+        print(f"\n❌ Upload failed:\n{error_msg}")
+        return False, "", f"❌ Upload failed: {str(e)}"
 # =====================================================
 # 모델 버닝 (Zero-shot + Optional Fine-tuning)
 # =====================================================
         output_md = f"""
 ## 📝 Generated Text
 ```
 {generated}
 ```
     batch_size,
     learning_rate,
     max_steps,
+    upload_to_hub,
+    hub_repo_name,
+    hub_private,
 ):
     """
+    Gradio UI용 모델 버닝 함수 (HuggingFace Hub Upload 포함)
     """
     try:
         if not model_url.strip():
             )
         if result['status'] == 'success':
+            hub_url = None
+            # Upload to HuggingFace Hub (if enabled)
+            if upload_to_hub:
+                success, hub_url, upload_msg = upload_to_huggingface_hub(
+                    model_path=result['model_path'],
+                    original_model_url=model_url,
+                    repo_name=hub_repo_name if hub_repo_name.strip() else None,
+                    private=hub_private,
+                )
+                if not success:
+                    print(f"\n{upload_msg}")
             # Save to database
             burning_info = {
                 'model_url': model_url,
                 'output_path': result['model_path'],
+                'hub_url': hub_url,
                 'use_hierarchical': use_hierarchical,
                 'dataset_used': has_dataset,
                 'conversion_rate': result.get('conversion_rate', 0.0),
 - **Original**: {model_url}
 - **Output**: `{result['model_path']}`
 - **Type**: {'Fine-tuning' if has_dataset else 'Zero-shot'}
+"""
+            if hub_url:
+                output_md += f"""
+## 🌐 HuggingFace Hub
+- **URL**: [{hub_url}]({hub_url})
+- **Private**: {hub_private}
+- **Status**: ✅ Uploaded
+"""
+            elif upload_to_hub:
+                output_md += f"""
+## 🌐 HuggingFace Hub
+- **Status**: ❌ Upload failed (check logs)
+"""
+            output_md += f"""
 ## 📊 Metrics
 - **Conversion Rate**: {result['conversion_rate']*100:.1f}%
 - **Quality Score**: {result.get('quality_score', 0.0):.2f}/1.00
             output_md += f"""
 ## 🎯 Usage
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Local
 model = AutoModelForCausalLM.from_pretrained("{result['model_path']}")
 tokenizer = AutoTokenizer.from_pretrained("{result['model_path']}")
+"""
+            if hub_url:
+                output_md += f"""
+# From HuggingFace Hub
+model = AutoModelForCausalLM.from_pretrained("{hub_url.replace('https://huggingface.co/', '')}")
+tokenizer = AutoTokenizer.from_pretrained("{hub_url.replace('https://huggingface.co/', '')}")
+"""
+            output_md += f"""
 inputs = tokenizer("Your prompt", return_tensors="pt")
 outputs = model.generate(**inputs, max_new_tokens=50)
 print(tokenizer.decode(outputs[0]))
             y='evaluation_score',
             size='conversion_rate',
             color='dataset_used',
+            hover_data=['model_url', 'output_path', 'hub_url'],
             title='Burning History'
         )
+        cols = ['id', 'model_url', 'hub_url', 'conversion_rate',
                 'evaluation_score', 'training_steps', 'timestamp']
         available = [c for c in cols if c in df.columns]
     gr.Markdown("""
     # 🔮 PHOENIX Retention Platform
+    **Zero-shot Model Burning + Optional Fine-tuning + HuggingFace Hub Auto-Upload**
     ✅ Zero-shot Conversion (데이터셋 불필요!)
     ✅ Optional Fine-tuning (데이터셋 기반)
     ✅ GQA Support
     ✅ O(n) Complexity
+    ✅ Auto Upload to HuggingFace Hub
     ---
     """)
             - **Zero-shot**: 데이터셋 없이 변환만 수행 (빠름!)
             - **Fine-tuning**: 데이터셋으로 추가 학습 (성능 향상)
+            - **HuggingFace Hub**: 자동으로 Hub에 업로드 (Private 기본)
             """)
             with gr.Row():
                         placeholder="phoenix_my_model (auto-generated if empty)"
                     )
+                    gr.Markdown("---")
+                    gr.Markdown("### 🌐 HuggingFace Hub Upload")
+                    burn_upload_hub = gr.Checkbox(
+                        value=True,
+                        label="📤 Upload to HuggingFace Hub"
+                    )
+                    burn_hub_repo = gr.Textbox(
+                        label="📦 Hub Repository Name (optional)",
+                        placeholder="phoenix-granite-350m (auto-generated if empty)"
+                    )
+                    burn_hub_private = gr.Checkbox(
+                        value=True,
+                        label="🔒 Private Repository"
+                    )
                     gr.Markdown("---")
                     gr.Markdown("### 📊 Dataset (Optional)")
                     burn_batch,
                     burn_lr,
                     burn_max_steps,
+                    burn_upload_hub,
+                    burn_hub_repo,
+                    burn_hub_private,
                 ],
                 [burn_output, burn_plot]
             )
             hist_btn.click(view_burning_history, outputs=[hist_output, hist_plot])
+    gr.Markdown(f"""
     ---
     ## 🔥 PHOENIX Model Burning
     ### Zero-shot (데이터셋 불필요!)
     1. 모델 URL 입력
+    2. "Upload to HuggingFace Hub" 체크 (기본 Private)
+    3. "Burn Model" 클릭
+    4. 완료! → 로컬 + Hub에 자동 업로드
     ### Fine-tuning (선택사항)
     1. Dataset Path 입력
     2. "Enable Fine-tuning" 체크
     3. "Burn Model" 클릭
+    **HuggingFace Token Status**: {'✅ Connected' if HF_TOKEN else '❌ Not Found (set HF_TOKEN env)'}
     **VIDraft AI Research Lab** | PHOENIX v1.0
     """)