# ============================================================================== # 1. SETUP AND IMPORT LIBRARIES (Hugging Face Spaces용으로 변경) # ============================================================================== # pip install 부분은 Hugging Face Spaces의 requirements.txt에서 처리됩니다. # Google Colab 관련 코드 및 학습 관련 코드는 모두 제거합니다. import os import pandas as pd import numpy as np import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow.keras.preprocessing.text import Tokenizer from tensorflow.keras.preprocessing.sequence import pad_sequences from sklearn.preprocessing import StandardScaler import gradio as gr import warnings import pickle import joblib import re warnings.filterwarnings('ignore') print("Libraries imported for inference.") np.random.seed(42) tf.random.set_seed(42) os.environ['PYTHONHASHHASHSEED'] = str(42) # 오타 수정: PYTHONHASHSEED print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU'))) if tf.config.experimental.list_physical_devices('GPU'): print("GPU is available and being used.") gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) print("GPU memory growth set to True.") except RuntimeError as e: print(e) else: print("No GPU available. TensorFlow will run on CPU.") # ============================================================================== # 2. GLOBAL CONSTANTS AND LOAD TRAINED OBJECTS # ============================================================================== MAX_VOCAB_SIZE = 15000 MAX_SEQUENCE_LENGTH = 400 MODEL_PATH = 'my_multimodal_model.keras' TOKENIZER_PATH = 'tokenizer.pkl' SCALER_PATH = 'scaler.pkl' METRICS_PATH = 'metrics.pkl' model = None tokenizer = None scaler = None # 성능 지표 기본값 설정 (모두 float으로 초기화) val_accuracy = 0.0 val_precision = 0.0 val_recall = 0.0 val_auc = 0.0 test_precision = 0.0 test_recall = 0.0 test_f1 = 0.0 test_auc = 0.0 PREDICTION_THRESHOLD = 0.5 # 기본값 print("\n--- Loading trained model and preprocessor objects ---") try: model = load_model(MODEL_PATH) print(f"Model loaded successfully from {MODEL_PATH}") with open(TOKENIZER_PATH, 'rb') as f: tokenizer = pickle.load(f) print(f"Tokenizer loaded successfully from {TOKENIZER_PATH}") with open(SCALER_PATH, 'rb') as f: scaler = joblib.load(f) print(f"Scaler loaded successfully from {SCALER_PATH}") # 성능 지표 로드 및 변수에 할당 (metrics.pkl에 float으로 저장되었을 것으로 가정) if os.path.exists(METRICS_PATH): with open(METRICS_PATH, 'rb') as f: metrics_dict = pickle.load(f) val_accuracy = metrics_dict.get('val_accuracy', 0.0) val_precision = metrics_dict.get('val_precision', 0.0) val_recall = metrics_dict.get('val_recall', 0.0) val_auc = metrics_dict.get('val_auc', 0.0) test_precision = metrics_dict.get('test_precision', 0.0) test_recall = metrics_dict.get('test_recall', 0.0) test_f1 = metrics_dict.get('test_f1', 0.0) test_auc = metrics_dict.get('test_auc', 0.0) PREDICTION_THRESHOLD = metrics_dict.get('prediction_threshold', 0.5) print(f"Metrics loaded successfully from {METRICS_PATH}") else: print(f"Warning: Metrics file not found at {METRICS_PATH}. Performance metrics will be displayed as 0.0.") except Exception as e: print(f"ERROR: Failed to load model or preprocessor files. Ensure all files are uploaded correctly. Error: {e}") model = None # ============================================================================== # 3. DATA PREPROCESSING FUNCTIONS FOR INFERENCE # ============================================================================== def process_transcripts_for_inference(file_path): """ Parses a transcript file for participant's speech, applying the same preprocessing as training. Handles both comma and tab separated files. """ data_rows = [] with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: lines = f.readlines() if not lines: raise ValueError("The transcript file is empty.") separator = '\t' if ',' in lines[0] and '\t' not in lines[0]: separator = ',' header = [h.strip().lower() for h in lines[0].split(separator)] try: speaker_idx = header.index('speaker') value_idx = header.index('value') except ValueError: raise ValueError(f"Transcript file header is malformed. Must contain 'speaker' and 'value'. Header found: {header}") for line in lines[1:]: parts = line.strip().split(separator) if len(parts) > max(speaker_idx, value_idx): speaker = parts[speaker_idx] value = parts[value_idx] data_rows.append({'speaker': speaker, 'value': value}) if not data_rows: raise ValueError("No valid data rows found in the transcript file.") df_transcript = pd.DataFrame(data_rows) participant_mask = df_transcript['speaker'].astype(str).str.strip().str.lower() == 'participant' participant_speech_df = df_transcript.loc[participant_mask] if participant_speech_df.empty: unique_speakers = df_transcript['speaker'].unique() raise ValueError(f"Could not find 'Participant' speech. Speakers found in file: {list(unique_speakers)}. Please check file content.") participant_speech = participant_speech_df['value'].astype(str) full_text = ' '.join(participant_speech).lower() full_text = re.sub(r'\[.*?\]', '', full_text) full_text = re.sub(r'[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F1E0-\U0001F1FF]+', '', full_text) full_text = re.sub(r'[^\w\s]', '', full_text) full_text = re.sub(r'\s+', ' ', full_text).strip() return full_text def get_feature_summary_from_path(file_path): """Calculates summary statistics for a given numerical feature file.""" if not os.path.exists(file_path): return np.zeros(9) try: df = pd.read_csv(file_path, sep=',', header=0) numeric_df = df.apply(pd.to_numeric, errors='coerce') data = numeric_df.dropna().values.flatten() if data.size == 0: return np.zeros(9) return np.array([ np.nanmean(data), np.nanstd(data), np.nanmin(data), np.nanmax(data), np.nanmedian(data), np.nanpercentile(data, 25), np.nanpercentile(data, 75), pd.Series(data).skew(), pd.Series(data).kurt() ]) except Exception: try: df = pd.read_csv(file_path, sep=r'\s+', header=0) numeric_df = df.apply(pd.to_numeric, errors='coerce') data = numeric_df.dropna().values.flatten() if data.size == 0: return np.zeros(9) return np.array([ np.nanmean(data), np.nanstd(data), np.nanmin(data), np.nanmax(data), np.nanmedian(data), np.nanpercentile(data, 25), np.nanpercentile(data, 75), pd.Series(data).skew(), pd.Series(data).kurt() ]) except Exception as e: return np.zeros(9) def process_all_numerical_features_from_path(file_paths_dict): """Concatenates summary statistics from all available feature files for a participant.""" feature_vectors = [] feature_keys = ['covarep', 'formant', 'aus', 'gaze', 'pose', 'features', 'features3d'] for key in feature_keys: path = file_paths_dict.get(key) feature_vectors.append(get_feature_summary_from_path(path)) return np.concatenate(feature_vectors) # ============================================================================== # 4. GRADIO PREDICTION FUNCTION # ============================================================================== def predict_depression(*files): if model is None or tokenizer is None or scaler is None: return "**오류:** 모델 또는 전처리 객체가 로드되지 않았습니다. 관리자에게 문의하거나 파일 업로드 여부를 확인해주세요.", "" if not all(files): return "모든 필수 파일을 업로드해주세요.", "" file_paths = { 'transcript': files[0].name, 'covarep': files[1].name, 'formant': files[2].name, 'aus': files[3].name, 'gaze': files[4].name, 'pose': files[5].name, 'features': files[6].name, 'features3d': files[7].name, } try: full_text = process_transcripts_for_inference(file_paths['transcript']) sequence = tokenizer.texts_to_sequences([full_text]) padded_sequence = pad_sequences(sequence, maxlen=MAX_SEQUENCE_LENGTH, padding='post', truncating='post') numerical_vector = process_all_numerical_features_from_path(file_paths) scaled_numerical_vector = scaler.transform(np.nan_to_num(numerical_vector.reshape(1, -1), nan=0.0, posinf=1e10, neginf=-1e10)) prediction = model.predict({"text_input": padded_sequence, "numerical_input": scaled_numerical_vector}) if np.isnan(prediction).any(): print("WARNING: NaN value found in model prediction. Returning default result.") probability = 0.5 else: probability = prediction[0][0] result_label = "Depressed (우울)" if probability > PREDICTION_THRESHOLD else "Control (비우울)" probability_str = f"우울증 확률: {probability:.2%}" return result_label, probability_str except Exception as e: return f"예측 처리 중 오류가 발생했습니다: {e}", "" # ============================================================================== # 5. GRADIO UI INTERFACE CREATION AND LAUNCH # ============================================================================== print("\nLaunching Gradio Interface...") if model is not None and tokenizer is not None and scaler is not None: with gr.Blocks() as demo: gr.Markdown( """ # AI-Powered Depression Screener (DAIC-WOZ) 이 도구는 면담 데이터를 기반으로 우울증 징후를 선별하기 위한 멀티모달 딥러닝 모델을 사용합니다. **단일 참가자에 대한 8가지 특징 파일을 모두 업로드하면 예측 결과를 얻을 수 있습니다.** **모델 검증 및 테스트 성능 지표:** """ ) # float 값에 직접 포맷팅 적용 gr.Markdown(f"## Validation Acc: {val_accuracy:.2f}, Prec: {val_precision:.2f}, Rec: {val_recall:.2f}, AUC: {val_auc:.2f}") gr.Markdown(f"**Test Set Performance (Threshold={PREDICTION_THRESHOLD:.2f}):** Prec: {test_precision:.2f}, Rec: {test_recall:.2f}, F1: {test_f1:.2f}, AUC: {test_auc:.2f}") with gr.Row(): file_transcript = gr.File(label="대본 (Transcript - e.g., 300_TRANSCRIPT.csv)", file_count="single") file_covarep = gr.File(label="COVAREP 특징 (.csv)", file_count="single") file_formant = gr.File(label="FORMANT 특징 (.csv)", file_count="single") file_aus = gr.File(label="Action Units (AUs) 특징 (.txt)", file_count="single") with gr.Row(): file_gaze = gr.File(label="Gaze 특징 (.txt)", file_count="single") file_pose = gr.File(label="Pose 특징 (.txt)", file_count="single") file_features = gr.File(label="Video 특징 (.txt)", file_count="single") file_features3d = gr.File(label="3D Video 특징 (.txt)", file_count="single") predict_button = gr.Button("분석하기") label_output = gr.Label(label="예측 결과") prob_output = gr.Textbox(label="확신 점수") predict_button.click( fn=predict_depression, inputs=[ file_transcript, file_covarep, file_formant, file_aus, file_gaze, file_pose, file_features, file_features3d ], outputs=[label_output, prob_output] ) gr.Markdown("---") gr.Markdown("### Important Considerations:\n" "1. **This is a screening tool, not a diagnostic tool.** It is not a substitute for professional medical advice. A high probability score indicates that patterns in the data are similar to those of depressed individuals in the dataset.\n" "2. **Bias-Aware Model:** The model was intentionally trained **only on the participant's data**, excluding the interviewer's prompts. This prevents the model from relying on biased shortcuts and promotes better generalization to real-world data, as recommended by recent research.\n" "3. **Data Quality:** Ensure the uploaded feature files are in the correct format (CSV for COVAREP/FORMANT, space-separated TXT for AUs/gaze/pose/features/features3D) and contain valid numerical data. Missing or malformed files will lead to incorrect predictions.\n" f"4. **Prediction Threshold:** The current prediction threshold is set to **{PREDICTION_THRESHOLD:.2f}**. This means any probability above this value will be classified as 'Depressed'. Adjusting this value can impact the balance between correctly identifying depressed individuals (Recall) and minimizing false alarms (Precision).\n" "5. **Troubleshooting 'loss: nan' or 'recall: 0':** If the model still doesn't find depressed cases, try further lowering the learning rate (e.g., `0.0001` or `0.00005`), adjusting `class_weight_dict` (e.g., higher weight for class 1), or reducing model complexity (fewer LSTM units, Dense units, or remove some Dropout/BatchNormalization layers). Check your raw data for extreme outliers." ) demo.launch() # Hugging Face Spaces에서는 share=True가 필요 없음 else: print("Gradio UI could not be launched because the model or preprocessor files failed to load. Please check the uploaded files and logs for details.")