Spaces:

kkandull
/

Depression-Prediction-AI

Sleeping

App Files Files Community

saved_models

by kkandull - opened Jun 19

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

-199

This PR is in draft mode

Files changed (3) hide show

app.py +0 -193
e_text_best_model.bin +0 -3
p_text_best_model.bin +0 -3

app.py DELETED Viewed

@@ -1,193 +0,0 @@
-# 필요한 라이브러리 임포트
-import os
-import pandas as pd # pandas는 현재 코드에서는 직접 사용되지 않지만, 데이터 처리 관련 유틸리티로 남겨둘 수 있습니다.
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from torch.utils.data import Dataset, DataLoader # DataLoader와 Dataset은 추론 시 직접 사용되진 않지만, 모델 정의에 필요할 수 있어 남겨둠
-from transformers import LongformerForSequenceClassification, AutoTokenizer
-import gradio as gr
-# =======================================================
-# 1. 전역 설정 및 상수 정의
-# =======================================================
-MODEL_NAME = 'kiddothe2b/longformer-mini-1024' # HuggingFace 모델 이름
-MAX_LEN = 1024 # 모델 입력 최대 길이
-# GPU 사용 가능 여부 확인 및 디바이스 설정
-device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
-# 토크나이저 로드 (추론 시 필요)
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-# =======================================================
-# 2. PyTorch 데이터셋 정의 (학습 시 사용되었던 클래스. 추론 시 직접 데이터 로더를 만들지는 않음)
-# =======================================================
-# 이 클래스는 모델이 학습될 때 사용되었던 데이터 구조를 정의합니다.
-# 추론 시에는 단일 텍스트 입력이 들어오므로 직접 DataLoader를 만들 필요는 없습니다.
-# 하지만 모델이 기대하는 입력 형태를 맞추기 위해 encoding 과정이 사용됩니다.
-class DepressionDataset(Dataset):
-    def __init__(self, texts, labels, tokenizer, max_len):
-        self.texts = texts
-        self.labels = labels
-        self.tokenizer = tokenizer
-        self.max_len = max_len
-    def __len__(self):
-        return len(self.texts)
-    def __getitem__(self, item):
-        text = str(self.texts[item])
-        label = self.labels[item]
-        encoding = self.tokenizer.encode_plus(
-            text,
-            add_special_tokens=True,
-            max_length=self.max_len,
-            return_token_type_ids=False,
-            padding='max_length',
-            truncation=True,
-            return_attention_mask=True,
-            return_tensors='pt',
-        )
-        return {
-            'input_ids': encoding['input_ids'].flatten(),
-            'attention_mask': encoding['attention_mask'].flatten(),
-            'labels': torch.tensor(label, dtype=torch.long)
-        }
-# =======================================================
-# 3. 모델 로딩 (학습된 가중치를 로드)
-# =======================================================
-print("\n--- Loading models for inference ---")
-# 모델 파일 경로 (saved_models 폴더가 없으므로 루트 디렉토리에 있다고 가정)
-# 이전에 있던 save_dir 변수는 이제 필요 없습니다.
-p_model_path = 'p_text_best_model.bin' # 파일명이 루트에 바로 있다고 가정
-e_model_path = 'e_text_best_model.bin' # 파일명이 루트에 바로 있다고 가정
-# 모델 로딩 및 평가 모드 설정
-p_model_for_inference = None
-e_model_for_inference = None
-try:
-    # 참가자 발화 모델 (P-model) 로드
-    if os.path.exists(p_model_path):
-        p_model_for_inference = LongformerForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
-        p_model_for_inference.load_state_dict(torch.load(p_model_path, map_location=device))
-        p_model_for_inference.to(device)
-        p_model_for_inference.eval() # 평가 모드 설정
-        print(f"P-model loaded successfully from {p_model_path}")
-    else:
-        print(f"Warning: P-model file not found at {p_model_path}. Please ensure it's uploaded to the root directory.")
-    # 엘리 발화 모델 (E-model) 로드
-    if os.path.exists(e_model_path):
-        e_model_for_inference = LongformerForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)
-        e_model_for_inference.load_state_dict(torch.load(e_model_path, map_location=device))
-        e_model_for_inference.to(device)
-        e_model_for_inference.eval() # 평가 모드 설정
-        print(f"E-model loaded successfully from {e_model_path}")
-    else:
-        print(f"Warning: E-model file not found at {e_model_path}. Please ensure it's uploaded to the root directory.")
-except Exception as e:
-    print(f"Error loading models: {e}")
-    # 모델 로딩 실패 시, UI가 실행되지 않도록 설정
-    p_model_for_inference = None
-    e_model_for_inference = None
-# =======================================================
-# 4. Gradio 예측 함수 정의
-# =======================================================
-def predict_depression(participant_text, ellie_text):
-    # 모델이 제대로 로드되었는지 확인
-    if p_model_for_inference is None or e_model_for_inference is None:
-        return "**오류:** 모델이 로드되지 않았습니다. 관��자에게 문의하거나 모델 파일 업로드 여부를 확인해주세요."
-    # 엘리 발화 전처리 (학습 시와 동일한 로직 적용)
-    e_text_words = ellie_text.split()
-    if len(e_text_words) > 0:
-        ellie_text_processed = " ".join(e_text_words[len(e_text_words) // 2:])
-    else:
-        ellie_text_processed = ""
-    # P-model 예측
-    p_encoding = tokenizer.encode_plus(
-        participant_text,
-        add_special_tokens=True,
-        max_length=MAX_LEN,
-        return_token_type_ids=False,
-        padding='max_length',
-        truncation=True,
-        return_attention_mask=True,
-        return_tensors='pt',
-    )
-    p_input_ids = p_encoding['input_ids'].to(device)
-    p_attention_mask = p_encoding['attention_mask'].to(device)
-    with torch.no_grad(): # 추론 시에는 그라디언트 계산 불필요
-        p_outputs = p_model_for_inference(input_ids=p_input_ids, attention_mask=p_attention_mask)
-        p_probs = F.softmax(p_outputs.logits, dim=1).cpu().numpy().flatten()
-        p_pred_label = np.argmax(p_probs)
-    # E-model 예측
-    e_encoding = tokenizer.encode_plus(
-        ellie_text_processed,
-        add_special_tokens=True,
-        max_length=MAX_LEN,
-        return_token_type_ids=False,
-        padding='max_length',
-        truncation=True,
-        return_attention_mask=True,
-        return_tensors='pt',
-    )
-    e_input_ids = e_encoding['input_ids'].to(device)
-    e_attention_mask = e_encoding['attention_mask'].to(device)
-    with torch.no_grad(): # 추론 시에는 그라디언트 계산 불필요
-        e_outputs = e_model_for_inference(input_ids=e_input_ids, attention_mask=e_attention_mask)
-        e_probs = F.softmax(e_outputs.logits, dim=1).cpu().numpy().flatten()
-        e_pred_label = np.argmax(e_probs)
-    # 앙상블 (OR 전략): 둘 중 하나라도 우울증(1)으로 예측하면 우울증으로 간주
-    ensemble_pred_label = 1 if p_pred_label == 1 or e_pred_label == 1 else 0
-    labels = ['Control (비우울)', 'Depressed (우울)']
-    ensemble_result = labels[ensemble_pred_label]
-    p_model_result = labels[p_pred_label]
-    e_model_result = labels[e_pred_label]
-    return (f"**최종 앙상블 예측 (OR 전략): {ensemble_result}**\n\n"
-            f"  - 참가자 모델 (P-longBERT) 예측: {p_model_result} (확률: Control={p_probs[0]:.2f}, Depressed={p_probs[1]:.2f})\n"
-            f"  - 엘리 모델 (E-longBERT) 예측: {e_model_result} (확률: Control={e_probs[0]:.2f}, Depressed={e_probs[1]:.2f})\n\n"
-            f"**참고:**\n"
-            f"- 예측은 각 대화 내용에만 기반하며, 실제 진단은 전문가와 상담해야 합니다.\n"
-            f"- GPU 환경에서는 예측이 빠르게 수행됩니다."
-           )
-# =======================================================
-# 5. Gradio UI 인터페이스 생성 및 실행
-# =======================================================
-print("\n--- Setting up Gradio UI ---")
-# 모델이 성공적으로 로드되었을 경우에만 Gradio UI를 실행
-if p_model_for_inference is not None and e_model_for_inference is not None:
-    gr.Interface(
-        fn=predict_depression,
-        inputs=[
-            gr.Textbox(lines=10, label="참가자 발화 내용 (Participant's speech)", placeholder="여기에 참가자의 발화 내용을 입력하세요..."),
-            gr.Textbox(lines=10, label="엘리 발화 내용 (Ellie's speech)", placeholder="여기에 엘리(가상 에이전트)의 발화 내용을 입력하세요... (전체 내용 중 후반부만 사용됨)")
-        ],
-        outputs="markdown",
-        title="DAIC-WOZ 우울증 감지 앙상블 모델 (GPU 가속)",
-        description=f"""이 앱은 DAIC-WOZ 데이터셋을 기반으로 참가자와 가상 에이전트(엘리)의 대화 내용을 분석하여 우울증 여부를 예측합니다.
-                      P-longBERT (참가자 발화)와 E-longBERT (엘리 발화) 모델의 앙상블 (OR 전략) 결과를 제공합니다.
-                      **GPU 환경에서는 예측이 빠르게 수행됩니다.**
-                      **참고:** 이는 AI 모델의 예측일 뿐이며, **실제 의학적 진단은 반드시 전문가와 상담해야 합니다.**
-                      사용 중인 디바이스: {device}
-                      """
-    ).launch() # Hugging Face Spaces에서는 share=True가 필요 없음
-else:
-    print("\nGradio UI could not be launched because models failed to load. Please check model files.")

e_text_best_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:accfd3aab9348aed05f32753969b48f9eae028f8f2139e1ec26d0c746cb0f0e4
-size 56324610

p_text_best_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a29208f799be73233fecbde5a3103c8df2de952d53dd071e4edc2a7eb73cdefe
-size 56324610