Update app.py
Browse files
app.py
CHANGED
|
@@ -54,18 +54,18 @@ class ExperimentDatabase:
|
|
| 54 |
def __init__(self, db_path: str):
|
| 55 |
self.db_path = db_path
|
| 56 |
self.init_database()
|
|
|
|
| 57 |
|
| 58 |
def init_database(self):
|
| 59 |
"""데이터베이스 초기화"""
|
| 60 |
with sqlite3.connect(self.db_path) as conn:
|
| 61 |
cursor = conn.cursor()
|
| 62 |
|
| 63 |
-
# 실험 테이블
|
| 64 |
cursor.execute("""
|
| 65 |
CREATE TABLE IF NOT EXISTS experiments (
|
| 66 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 67 |
model_type TEXT NOT NULL,
|
| 68 |
-
base_model_url TEXT,
|
| 69 |
sequence_length INTEGER,
|
| 70 |
power_mode TEXT,
|
| 71 |
compression_level REAL,
|
|
@@ -92,14 +92,40 @@ class ExperimentDatabase:
|
|
| 92 |
ON experiments(timestamp DESC)
|
| 93 |
""")
|
| 94 |
|
| 95 |
-
cursor.execute("""
|
| 96 |
-
CREATE INDEX IF NOT EXISTS idx_base_model
|
| 97 |
-
ON experiments(base_model_url)
|
| 98 |
-
""")
|
| 99 |
-
|
| 100 |
conn.commit()
|
| 101 |
print("✅ Database initialized")
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
def save_experiment(self, config: Dict, metrics: Dict) -> int:
|
| 104 |
"""실험 저장"""
|
| 105 |
with sqlite3.connect(self.db_path) as conn:
|
|
@@ -171,13 +197,17 @@ class ExperimentDatabase:
|
|
| 171 |
""")
|
| 172 |
by_model = dict(cursor.fetchall())
|
| 173 |
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
return {
|
| 183 |
'total_experiments': total,
|
|
@@ -189,48 +219,66 @@ class RetentionVectorStore:
|
|
| 189 |
"""ChromaDB 벡터 저장소"""
|
| 190 |
|
| 191 |
def __init__(self, persist_directory: str):
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
def add_retention_state(self, experiment_id: int, states: Dict, metadata: Dict):
|
| 204 |
"""Retention state 저장"""
|
| 205 |
-
|
|
|
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
def search(self, query: str, top_k: int = 10) -> List[Dict]:
|
| 214 |
"""실험 검색"""
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
results = self.collection.query(
|
| 218 |
-
query_embeddings=[query_vector.tolist()],
|
| 219 |
-
n_results=top_k
|
| 220 |
-
)
|
| 221 |
-
|
| 222 |
-
if not results['ids'][0]:
|
| 223 |
return []
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
def _states_to_vector(self, states: Dict) -> np.ndarray:
|
| 236 |
"""States를 고정 크기 벡터로 변환"""
|
|
@@ -648,14 +696,18 @@ def initialize_default_models():
|
|
| 648 |
models = {}
|
| 649 |
|
| 650 |
try:
|
| 651 |
-
# PHOENIX with Granite
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 659 |
|
| 660 |
# PHOENIX without base
|
| 661 |
models['phoenix_standalone'] = PHOENIXRetention(
|
|
@@ -665,21 +717,32 @@ def initialize_default_models():
|
|
| 665 |
device=DEVICE,
|
| 666 |
base_model_url=None
|
| 667 |
)
|
|
|
|
| 668 |
|
| 669 |
-
# Transformer baseline
|
| 670 |
-
|
| 671 |
-
|
| 672 |
-
|
| 673 |
-
|
| 674 |
-
|
| 675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
-
print("✅
|
| 678 |
return models
|
| 679 |
|
| 680 |
except Exception as e:
|
| 681 |
print(f"❌ Model initialization failed: {e}")
|
| 682 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 683 |
|
| 684 |
# 데이터베이스 및 모델 초기화
|
| 685 |
db = ExperimentDatabase(DB_PATH)
|
|
@@ -713,7 +776,7 @@ def run_retention_experiment(
|
|
| 713 |
# 실험 설정
|
| 714 |
config = {
|
| 715 |
'model_type': model_name,
|
| 716 |
-
'base_model_url': custom_model_url if custom_model_url else model.base_model_url,
|
| 717 |
'sequence_length': sequence_length,
|
| 718 |
'power_mode': power_mode,
|
| 719 |
'compression_level': compression_level,
|
|
@@ -890,10 +953,16 @@ def view_experiment_history(limit=20):
|
|
| 890 |
title='모델별 실행 시간 추이'
|
| 891 |
)
|
| 892 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 893 |
history_text = f"""
|
| 894 |
## 📊 실험 이력 ({len(df)}개)
|
| 895 |
|
| 896 |
-
{df[
|
| 897 |
"""
|
| 898 |
|
| 899 |
return history_text, fig
|
|
@@ -917,9 +986,10 @@ def get_database_statistics():
|
|
| 917 |
for model, count in stats['by_model'].items():
|
| 918 |
stats_text += f"- **{model}**: {count}개\n"
|
| 919 |
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
|
|
|
| 923 |
|
| 924 |
return stats_text
|
| 925 |
|
|
@@ -954,7 +1024,7 @@ with gr.Blocks(
|
|
| 954 |
with gr.Column(scale=1):
|
| 955 |
model_select = gr.Dropdown(
|
| 956 |
choices=list(MODELS.keys()),
|
| 957 |
-
value=
|
| 958 |
label="기본 모델 선택"
|
| 959 |
)
|
| 960 |
|
|
|
|
| 54 |
def __init__(self, db_path: str):
|
| 55 |
self.db_path = db_path
|
| 56 |
self.init_database()
|
| 57 |
+
self.migrate_database() # 마이그레이션 실행
|
| 58 |
|
| 59 |
def init_database(self):
|
| 60 |
"""데이터베이스 초기화"""
|
| 61 |
with sqlite3.connect(self.db_path) as conn:
|
| 62 |
cursor = conn.cursor()
|
| 63 |
|
| 64 |
+
# 실험 테이블 (기본 버전)
|
| 65 |
cursor.execute("""
|
| 66 |
CREATE TABLE IF NOT EXISTS experiments (
|
| 67 |
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 68 |
model_type TEXT NOT NULL,
|
|
|
|
| 69 |
sequence_length INTEGER,
|
| 70 |
power_mode TEXT,
|
| 71 |
compression_level REAL,
|
|
|
|
| 92 |
ON experiments(timestamp DESC)
|
| 93 |
""")
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
conn.commit()
|
| 96 |
print("✅ Database initialized")
|
| 97 |
|
| 98 |
+
def migrate_database(self):
|
| 99 |
+
"""데이터베이스 마이그레이션 - 새 컬럼 추가"""
|
| 100 |
+
with sqlite3.connect(self.db_path) as conn:
|
| 101 |
+
cursor = conn.cursor()
|
| 102 |
+
|
| 103 |
+
# 컬럼 존재 확인
|
| 104 |
+
cursor.execute("PRAGMA table_info(experiments)")
|
| 105 |
+
columns = [column[1] for column in cursor.fetchall()]
|
| 106 |
+
|
| 107 |
+
# base_model_url 컬럼이 없으면 추가
|
| 108 |
+
if 'base_model_url' not in columns:
|
| 109 |
+
try:
|
| 110 |
+
cursor.execute("""
|
| 111 |
+
ALTER TABLE experiments
|
| 112 |
+
ADD COLUMN base_model_url TEXT
|
| 113 |
+
""")
|
| 114 |
+
print("✅ Database migrated: base_model_url column added")
|
| 115 |
+
except sqlite3.OperationalError as e:
|
| 116 |
+
print(f"⚠️ Migration warning: {e}")
|
| 117 |
+
|
| 118 |
+
# 인덱스 추가
|
| 119 |
+
try:
|
| 120 |
+
cursor.execute("""
|
| 121 |
+
CREATE INDEX IF NOT EXISTS idx_base_model
|
| 122 |
+
ON experiments(base_model_url)
|
| 123 |
+
""")
|
| 124 |
+
except sqlite3.OperationalError:
|
| 125 |
+
pass
|
| 126 |
+
|
| 127 |
+
conn.commit()
|
| 128 |
+
|
| 129 |
def save_experiment(self, config: Dict, metrics: Dict) -> int:
|
| 130 |
"""실험 저장"""
|
| 131 |
with sqlite3.connect(self.db_path) as conn:
|
|
|
|
| 197 |
""")
|
| 198 |
by_model = dict(cursor.fetchall())
|
| 199 |
|
| 200 |
+
# base_model_url 컬럼이 있는 경우에만 조회
|
| 201 |
+
try:
|
| 202 |
+
cursor.execute("""
|
| 203 |
+
SELECT base_model_url, COUNT(*) as count
|
| 204 |
+
FROM experiments
|
| 205 |
+
WHERE base_model_url IS NOT NULL
|
| 206 |
+
GROUP BY base_model_url
|
| 207 |
+
""")
|
| 208 |
+
by_base_model = dict(cursor.fetchall())
|
| 209 |
+
except sqlite3.OperationalError:
|
| 210 |
+
by_base_model = {}
|
| 211 |
|
| 212 |
return {
|
| 213 |
'total_experiments': total,
|
|
|
|
| 219 |
"""ChromaDB 벡터 저장소"""
|
| 220 |
|
| 221 |
def __init__(self, persist_directory: str):
|
| 222 |
+
try:
|
| 223 |
+
self.client = chromadb.Client(Settings(
|
| 224 |
+
persist_directory=persist_directory,
|
| 225 |
+
anonymized_telemetry=False
|
| 226 |
+
))
|
| 227 |
+
|
| 228 |
+
self.collection = self.client.get_or_create_collection(
|
| 229 |
+
name="retention_states",
|
| 230 |
+
metadata={"description": "PHOENIX Retention states"}
|
| 231 |
+
)
|
| 232 |
+
print("✅ Vector store initialized")
|
| 233 |
+
except Exception as e:
|
| 234 |
+
print(f"⚠️ Vector store initialization warning: {e}")
|
| 235 |
+
self.client = None
|
| 236 |
+
self.collection = None
|
| 237 |
|
| 238 |
def add_retention_state(self, experiment_id: int, states: Dict, metadata: Dict):
|
| 239 |
"""Retention state 저장"""
|
| 240 |
+
if self.collection is None:
|
| 241 |
+
return
|
| 242 |
|
| 243 |
+
try:
|
| 244 |
+
state_vector = self._states_to_vector(states)
|
| 245 |
+
|
| 246 |
+
self.collection.add(
|
| 247 |
+
embeddings=[state_vector.tolist()],
|
| 248 |
+
metadatas=[{**metadata, 'experiment_id': experiment_id}],
|
| 249 |
+
ids=[f"exp_{experiment_id}"]
|
| 250 |
+
)
|
| 251 |
+
except Exception as e:
|
| 252 |
+
print(f"⚠️ Vector store save warning: {e}")
|
| 253 |
|
| 254 |
def search(self, query: str, top_k: int = 10) -> List[Dict]:
|
| 255 |
"""실험 검색"""
|
| 256 |
+
if self.collection is None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
return []
|
| 258 |
|
| 259 |
+
try:
|
| 260 |
+
query_vector = self._text_to_vector(query)
|
| 261 |
+
|
| 262 |
+
results = self.collection.query(
|
| 263 |
+
query_embeddings=[query_vector.tolist()],
|
| 264 |
+
n_results=top_k
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
if not results['ids'][0]:
|
| 268 |
+
return []
|
| 269 |
+
|
| 270 |
+
formatted_results = []
|
| 271 |
+
for i in range(len(results['ids'][0])):
|
| 272 |
+
formatted_results.append({
|
| 273 |
+
'experiment_id': results['metadatas'][0][i].get('experiment_id'),
|
| 274 |
+
'score': 1.0 - results['distances'][0][i],
|
| 275 |
+
'metadata': results['metadatas'][0][i]
|
| 276 |
+
})
|
| 277 |
+
|
| 278 |
+
return formatted_results
|
| 279 |
+
except Exception as e:
|
| 280 |
+
print(f"⚠️ Vector store search warning: {e}")
|
| 281 |
+
return []
|
| 282 |
|
| 283 |
def _states_to_vector(self, states: Dict) -> np.ndarray:
|
| 284 |
"""States를 고정 크기 벡터로 변환"""
|
|
|
|
| 696 |
models = {}
|
| 697 |
|
| 698 |
try:
|
| 699 |
+
# PHOENIX with Granite (옵션)
|
| 700 |
+
try:
|
| 701 |
+
models['phoenix_granite'] = PHOENIXRetention(
|
| 702 |
+
d_model=512,
|
| 703 |
+
d_state=256,
|
| 704 |
+
num_layers=12,
|
| 705 |
+
device=DEVICE,
|
| 706 |
+
base_model_url=DEFAULT_MODEL
|
| 707 |
+
)
|
| 708 |
+
print("✅ phoenix_granite initialized")
|
| 709 |
+
except Exception as e:
|
| 710 |
+
print(f"⚠️ phoenix_granite initialization skipped: {e}")
|
| 711 |
|
| 712 |
# PHOENIX without base
|
| 713 |
models['phoenix_standalone'] = PHOENIXRetention(
|
|
|
|
| 717 |
device=DEVICE,
|
| 718 |
base_model_url=None
|
| 719 |
)
|
| 720 |
+
print("✅ phoenix_standalone initialized")
|
| 721 |
|
| 722 |
+
# Transformer baseline (옵션)
|
| 723 |
+
try:
|
| 724 |
+
models['transformer_granite'] = TransformerBaseline(
|
| 725 |
+
d_model=512,
|
| 726 |
+
d_state=256,
|
| 727 |
+
device=DEVICE,
|
| 728 |
+
base_model_url=DEFAULT_MODEL
|
| 729 |
+
)
|
| 730 |
+
print("✅ transformer_granite initialized")
|
| 731 |
+
except Exception as e:
|
| 732 |
+
print(f"⚠️ transformer_granite initialization skipped: {e}")
|
| 733 |
|
| 734 |
+
print(f"✅ {len(models)} models initialized successfully")
|
| 735 |
return models
|
| 736 |
|
| 737 |
except Exception as e:
|
| 738 |
print(f"❌ Model initialization failed: {e}")
|
| 739 |
+
return {'phoenix_standalone': PHOENIXRetention(
|
| 740 |
+
d_model=512,
|
| 741 |
+
d_state=256,
|
| 742 |
+
num_layers=12,
|
| 743 |
+
device=DEVICE,
|
| 744 |
+
base_model_url=None
|
| 745 |
+
)}
|
| 746 |
|
| 747 |
# 데이터베이스 및 모델 초기화
|
| 748 |
db = ExperimentDatabase(DB_PATH)
|
|
|
|
| 776 |
# 실험 설정
|
| 777 |
config = {
|
| 778 |
'model_type': model_name,
|
| 779 |
+
'base_model_url': custom_model_url if custom_model_url else (model.base_model_url if hasattr(model, 'base_model_url') else None),
|
| 780 |
'sequence_length': sequence_length,
|
| 781 |
'power_mode': power_mode,
|
| 782 |
'compression_level': compression_level,
|
|
|
|
| 953 |
title='모델별 실행 시간 추이'
|
| 954 |
)
|
| 955 |
|
| 956 |
+
# base_model_url 컬럼이 있는지 확인
|
| 957 |
+
if 'base_model_url' in df.columns:
|
| 958 |
+
display_cols = ['id', 'model_type', 'base_model_url', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']
|
| 959 |
+
else:
|
| 960 |
+
display_cols = ['id', 'model_type', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']
|
| 961 |
+
|
| 962 |
history_text = f"""
|
| 963 |
## 📊 실험 이력 ({len(df)}개)
|
| 964 |
|
| 965 |
+
{df[display_cols].to_markdown(index=False)}
|
| 966 |
"""
|
| 967 |
|
| 968 |
return history_text, fig
|
|
|
|
| 986 |
for model, count in stats['by_model'].items():
|
| 987 |
stats_text += f"- **{model}**: {count}개\n"
|
| 988 |
|
| 989 |
+
if stats['by_base_model']:
|
| 990 |
+
stats_text += "\n### Base Model별 실험 수\n"
|
| 991 |
+
for base_model, count in stats['by_base_model'].items():
|
| 992 |
+
stats_text += f"- **{base_model}**: {count}개\n"
|
| 993 |
|
| 994 |
return stats_text
|
| 995 |
|
|
|
|
| 1024 |
with gr.Column(scale=1):
|
| 1025 |
model_select = gr.Dropdown(
|
| 1026 |
choices=list(MODELS.keys()),
|
| 1027 |
+
value=list(MODELS.keys())[0] if MODELS else None,
|
| 1028 |
label="기본 모델 선택"
|
| 1029 |
)
|
| 1030 |
|