seawolf2357 commited on
Commit
d5c58c2
·
verified ·
1 Parent(s): e6bcdb0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +140 -70
app.py CHANGED
@@ -54,18 +54,18 @@ class ExperimentDatabase:
54
  def __init__(self, db_path: str):
55
  self.db_path = db_path
56
  self.init_database()
 
57
 
58
  def init_database(self):
59
  """데이터베이스 초기화"""
60
  with sqlite3.connect(self.db_path) as conn:
61
  cursor = conn.cursor()
62
 
63
- # 실험 테이블
64
  cursor.execute("""
65
  CREATE TABLE IF NOT EXISTS experiments (
66
  id INTEGER PRIMARY KEY AUTOINCREMENT,
67
  model_type TEXT NOT NULL,
68
- base_model_url TEXT,
69
  sequence_length INTEGER,
70
  power_mode TEXT,
71
  compression_level REAL,
@@ -92,14 +92,40 @@ class ExperimentDatabase:
92
  ON experiments(timestamp DESC)
93
  """)
94
 
95
- cursor.execute("""
96
- CREATE INDEX IF NOT EXISTS idx_base_model
97
- ON experiments(base_model_url)
98
- """)
99
-
100
  conn.commit()
101
  print("✅ Database initialized")
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  def save_experiment(self, config: Dict, metrics: Dict) -> int:
104
  """실험 저장"""
105
  with sqlite3.connect(self.db_path) as conn:
@@ -171,13 +197,17 @@ class ExperimentDatabase:
171
  """)
172
  by_model = dict(cursor.fetchall())
173
 
174
- cursor.execute("""
175
- SELECT base_model_url, COUNT(*) as count
176
- FROM experiments
177
- WHERE base_model_url IS NOT NULL
178
- GROUP BY base_model_url
179
- """)
180
- by_base_model = dict(cursor.fetchall())
 
 
 
 
181
 
182
  return {
183
  'total_experiments': total,
@@ -189,48 +219,66 @@ class RetentionVectorStore:
189
  """ChromaDB 벡터 저장소"""
190
 
191
  def __init__(self, persist_directory: str):
192
- self.client = chromadb.Client(Settings(
193
- persist_directory=persist_directory,
194
- anonymized_telemetry=False
195
- ))
196
-
197
- self.collection = self.client.get_or_create_collection(
198
- name="retention_states",
199
- metadata={"description": "PHOENIX Retention states"}
200
- )
201
- print("✅ Vector store initialized")
 
 
 
 
 
202
 
203
  def add_retention_state(self, experiment_id: int, states: Dict, metadata: Dict):
204
  """Retention state 저장"""
205
- state_vector = self._states_to_vector(states)
 
206
 
207
- self.collection.add(
208
- embeddings=[state_vector.tolist()],
209
- metadatas=[{**metadata, 'experiment_id': experiment_id}],
210
- ids=[f"exp_{experiment_id}"]
211
- )
 
 
 
 
 
212
 
213
  def search(self, query: str, top_k: int = 10) -> List[Dict]:
214
  """실험 검색"""
215
- query_vector = self._text_to_vector(query)
216
-
217
- results = self.collection.query(
218
- query_embeddings=[query_vector.tolist()],
219
- n_results=top_k
220
- )
221
-
222
- if not results['ids'][0]:
223
  return []
224
 
225
- formatted_results = []
226
- for i in range(len(results['ids'][0])):
227
- formatted_results.append({
228
- 'experiment_id': results['metadatas'][0][i].get('experiment_id'),
229
- 'score': 1.0 - results['distances'][0][i],
230
- 'metadata': results['metadatas'][0][i]
231
- })
232
-
233
- return formatted_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  def _states_to_vector(self, states: Dict) -> np.ndarray:
236
  """States를 고정 크기 벡터로 변환"""
@@ -648,14 +696,18 @@ def initialize_default_models():
648
  models = {}
649
 
650
  try:
651
- # PHOENIX with Granite
652
- models['phoenix_granite'] = PHOENIXRetention(
653
- d_model=512,
654
- d_state=256,
655
- num_layers=12,
656
- device=DEVICE,
657
- base_model_url=DEFAULT_MODEL
658
- )
 
 
 
 
659
 
660
  # PHOENIX without base
661
  models['phoenix_standalone'] = PHOENIXRetention(
@@ -665,21 +717,32 @@ def initialize_default_models():
665
  device=DEVICE,
666
  base_model_url=None
667
  )
 
668
 
669
- # Transformer baseline
670
- models['transformer_granite'] = TransformerBaseline(
671
- d_model=512,
672
- d_state=256,
673
- device=DEVICE,
674
- base_model_url=DEFAULT_MODEL
675
- )
 
 
 
 
676
 
677
- print("✅ Default models initialized")
678
  return models
679
 
680
  except Exception as e:
681
  print(f"❌ Model initialization failed: {e}")
682
- return {}
 
 
 
 
 
 
683
 
684
  # 데이터베이스 및 모델 초기화
685
  db = ExperimentDatabase(DB_PATH)
@@ -713,7 +776,7 @@ def run_retention_experiment(
713
  # 실험 설정
714
  config = {
715
  'model_type': model_name,
716
- 'base_model_url': custom_model_url if custom_model_url else model.base_model_url,
717
  'sequence_length': sequence_length,
718
  'power_mode': power_mode,
719
  'compression_level': compression_level,
@@ -890,10 +953,16 @@ def view_experiment_history(limit=20):
890
  title='모델별 실행 시간 추이'
891
  )
892
 
 
 
 
 
 
 
893
  history_text = f"""
894
  ## 📊 실험 이력 ({len(df)}개)
895
 
896
- {df[['id', 'model_type', 'base_model_url', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']].to_markdown(index=False)}
897
  """
898
 
899
  return history_text, fig
@@ -917,9 +986,10 @@ def get_database_statistics():
917
  for model, count in stats['by_model'].items():
918
  stats_text += f"- **{model}**: {count}개\n"
919
 
920
- stats_text += "\n### Base Model별 실험 수\n"
921
- for base_model, count in stats['by_base_model'].items():
922
- stats_text += f"- **{base_model}**: {count}개\n"
 
923
 
924
  return stats_text
925
 
@@ -954,7 +1024,7 @@ with gr.Blocks(
954
  with gr.Column(scale=1):
955
  model_select = gr.Dropdown(
956
  choices=list(MODELS.keys()),
957
- value='phoenix_granite',
958
  label="기본 모델 선택"
959
  )
960
 
 
54
  def __init__(self, db_path: str):
55
  self.db_path = db_path
56
  self.init_database()
57
+ self.migrate_database() # 마이그레이션 실행
58
 
59
  def init_database(self):
60
  """데이터베이스 초기화"""
61
  with sqlite3.connect(self.db_path) as conn:
62
  cursor = conn.cursor()
63
 
64
+ # 실험 테이블 (기본 버전)
65
  cursor.execute("""
66
  CREATE TABLE IF NOT EXISTS experiments (
67
  id INTEGER PRIMARY KEY AUTOINCREMENT,
68
  model_type TEXT NOT NULL,
 
69
  sequence_length INTEGER,
70
  power_mode TEXT,
71
  compression_level REAL,
 
92
  ON experiments(timestamp DESC)
93
  """)
94
 
 
 
 
 
 
95
  conn.commit()
96
  print("✅ Database initialized")
97
 
98
+ def migrate_database(self):
99
+ """데이터베이스 마이그레이션 - 새 컬럼 추가"""
100
+ with sqlite3.connect(self.db_path) as conn:
101
+ cursor = conn.cursor()
102
+
103
+ # 컬럼 존재 확인
104
+ cursor.execute("PRAGMA table_info(experiments)")
105
+ columns = [column[1] for column in cursor.fetchall()]
106
+
107
+ # base_model_url 컬럼이 없으면 추가
108
+ if 'base_model_url' not in columns:
109
+ try:
110
+ cursor.execute("""
111
+ ALTER TABLE experiments
112
+ ADD COLUMN base_model_url TEXT
113
+ """)
114
+ print("✅ Database migrated: base_model_url column added")
115
+ except sqlite3.OperationalError as e:
116
+ print(f"⚠️ Migration warning: {e}")
117
+
118
+ # 인덱스 추가
119
+ try:
120
+ cursor.execute("""
121
+ CREATE INDEX IF NOT EXISTS idx_base_model
122
+ ON experiments(base_model_url)
123
+ """)
124
+ except sqlite3.OperationalError:
125
+ pass
126
+
127
+ conn.commit()
128
+
129
  def save_experiment(self, config: Dict, metrics: Dict) -> int:
130
  """실험 저장"""
131
  with sqlite3.connect(self.db_path) as conn:
 
197
  """)
198
  by_model = dict(cursor.fetchall())
199
 
200
+ # base_model_url 컬럼이 있는 경우에만 조회
201
+ try:
202
+ cursor.execute("""
203
+ SELECT base_model_url, COUNT(*) as count
204
+ FROM experiments
205
+ WHERE base_model_url IS NOT NULL
206
+ GROUP BY base_model_url
207
+ """)
208
+ by_base_model = dict(cursor.fetchall())
209
+ except sqlite3.OperationalError:
210
+ by_base_model = {}
211
 
212
  return {
213
  'total_experiments': total,
 
219
  """ChromaDB 벡터 저장소"""
220
 
221
  def __init__(self, persist_directory: str):
222
+ try:
223
+ self.client = chromadb.Client(Settings(
224
+ persist_directory=persist_directory,
225
+ anonymized_telemetry=False
226
+ ))
227
+
228
+ self.collection = self.client.get_or_create_collection(
229
+ name="retention_states",
230
+ metadata={"description": "PHOENIX Retention states"}
231
+ )
232
+ print("✅ Vector store initialized")
233
+ except Exception as e:
234
+ print(f"⚠️ Vector store initialization warning: {e}")
235
+ self.client = None
236
+ self.collection = None
237
 
238
  def add_retention_state(self, experiment_id: int, states: Dict, metadata: Dict):
239
  """Retention state 저장"""
240
+ if self.collection is None:
241
+ return
242
 
243
+ try:
244
+ state_vector = self._states_to_vector(states)
245
+
246
+ self.collection.add(
247
+ embeddings=[state_vector.tolist()],
248
+ metadatas=[{**metadata, 'experiment_id': experiment_id}],
249
+ ids=[f"exp_{experiment_id}"]
250
+ )
251
+ except Exception as e:
252
+ print(f"⚠️ Vector store save warning: {e}")
253
 
254
  def search(self, query: str, top_k: int = 10) -> List[Dict]:
255
  """실험 검색"""
256
+ if self.collection is None:
 
 
 
 
 
 
 
257
  return []
258
 
259
+ try:
260
+ query_vector = self._text_to_vector(query)
261
+
262
+ results = self.collection.query(
263
+ query_embeddings=[query_vector.tolist()],
264
+ n_results=top_k
265
+ )
266
+
267
+ if not results['ids'][0]:
268
+ return []
269
+
270
+ formatted_results = []
271
+ for i in range(len(results['ids'][0])):
272
+ formatted_results.append({
273
+ 'experiment_id': results['metadatas'][0][i].get('experiment_id'),
274
+ 'score': 1.0 - results['distances'][0][i],
275
+ 'metadata': results['metadatas'][0][i]
276
+ })
277
+
278
+ return formatted_results
279
+ except Exception as e:
280
+ print(f"⚠️ Vector store search warning: {e}")
281
+ return []
282
 
283
  def _states_to_vector(self, states: Dict) -> np.ndarray:
284
  """States를 고정 크기 벡터로 변환"""
 
696
  models = {}
697
 
698
  try:
699
+ # PHOENIX with Granite (옵션)
700
+ try:
701
+ models['phoenix_granite'] = PHOENIXRetention(
702
+ d_model=512,
703
+ d_state=256,
704
+ num_layers=12,
705
+ device=DEVICE,
706
+ base_model_url=DEFAULT_MODEL
707
+ )
708
+ print("✅ phoenix_granite initialized")
709
+ except Exception as e:
710
+ print(f"⚠️ phoenix_granite initialization skipped: {e}")
711
 
712
  # PHOENIX without base
713
  models['phoenix_standalone'] = PHOENIXRetention(
 
717
  device=DEVICE,
718
  base_model_url=None
719
  )
720
+ print("✅ phoenix_standalone initialized")
721
 
722
+ # Transformer baseline (옵션)
723
+ try:
724
+ models['transformer_granite'] = TransformerBaseline(
725
+ d_model=512,
726
+ d_state=256,
727
+ device=DEVICE,
728
+ base_model_url=DEFAULT_MODEL
729
+ )
730
+ print("✅ transformer_granite initialized")
731
+ except Exception as e:
732
+ print(f"⚠️ transformer_granite initialization skipped: {e}")
733
 
734
+ print(f"✅ {len(models)} models initialized successfully")
735
  return models
736
 
737
  except Exception as e:
738
  print(f"❌ Model initialization failed: {e}")
739
+ return {'phoenix_standalone': PHOENIXRetention(
740
+ d_model=512,
741
+ d_state=256,
742
+ num_layers=12,
743
+ device=DEVICE,
744
+ base_model_url=None
745
+ )}
746
 
747
  # 데이터베이스 및 모델 초기화
748
  db = ExperimentDatabase(DB_PATH)
 
776
  # 실험 설정
777
  config = {
778
  'model_type': model_name,
779
+ 'base_model_url': custom_model_url if custom_model_url else (model.base_model_url if hasattr(model, 'base_model_url') else None),
780
  'sequence_length': sequence_length,
781
  'power_mode': power_mode,
782
  'compression_level': compression_level,
 
953
  title='모델별 실행 시간 추이'
954
  )
955
 
956
+ # base_model_url 컬럼이 있는지 확인
957
+ if 'base_model_url' in df.columns:
958
+ display_cols = ['id', 'model_type', 'base_model_url', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']
959
+ else:
960
+ display_cols = ['id', 'model_type', 'sequence_length', 'elapsed_time', 'throughput', 'timestamp']
961
+
962
  history_text = f"""
963
  ## 📊 실험 이력 ({len(df)}개)
964
 
965
+ {df[display_cols].to_markdown(index=False)}
966
  """
967
 
968
  return history_text, fig
 
986
  for model, count in stats['by_model'].items():
987
  stats_text += f"- **{model}**: {count}개\n"
988
 
989
+ if stats['by_base_model']:
990
+ stats_text += "\n### Base Model별 실험 수\n"
991
+ for base_model, count in stats['by_base_model'].items():
992
+ stats_text += f"- **{base_model}**: {count}개\n"
993
 
994
  return stats_text
995
 
 
1024
  with gr.Column(scale=1):
1025
  model_select = gr.Dropdown(
1026
  choices=list(MODELS.keys()),
1027
+ value=list(MODELS.keys())[0] if MODELS else None,
1028
  label="기본 모델 선택"
1029
  )
1030