Upload 9 files

Browse files

Files changed (9) hide show

README.md +75 -0
added_tokens.json +3 -0
config.json +96 -0
label_info.json +93 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
spm.model +3 -0
thresholds.json +33 -0
tokenizer_config.json +59 -0

README.md ADDED Viewed

	@@ -0,0 +1,75 @@

+# GoEmotions Fine-tuned Model
+이 모델은 GoEmotions 데이터셋으로 훈련된 다중 감정 분류 모델입니다.
+## 모델 정보
+- **Base Model**: Mango-Juice/trpg_mlm
+- **Task**: Multi-label Emotion Classification
+- **Labels**: 28개의 감정 라벨
+- **Training**: 2차 파인튜닝 완료 (goEmotions 데이터 및 TRPG 문장 데이터)
+## 감정 라벨 목록
+- admiration
+- amusement
+- anger
+- annoyance
+- approval
+- caring
+- confusion
+- curiosity
+- desire
+- disappointment
+- disapproval
+- disgust
+- embarrassment
+- excitement
+- fear
+- gratitude
+- grief
+- joy
+- love
+- nervousness
+- optimism
+- pride
+- realization
+- relief
+- remorse
+- sadness
+- surprise
+- neutral
+## 사용 방법
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# 모델과 토크나이저 로드
+tokenizer = AutoTokenizer.from_pretrained("your-username/final_emotion_model")
+model = AutoModelForSequenceClassification.from_pretrained("your-username/final_emotion_model")
+# 추론
+def predict_emotions(text):
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+        probs = torch.sigmoid(logits).cpu().numpy()[0]
+    emotion_labels = ['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']
+    return {emotion: float(prob) for emotion, prob in zip(emotion_labels, probs)}
+# 예시
+text = "I am so happy today!"
+emotions = predict_emotions(text)
+print(emotions)
+```
+## 성능
+- Fine-tuning 완료된 모델로 향상된 감정 분류 성능 제공
+- 희소 클래스에 대한 데이터 증강 적용
+## 훈련 세부사항
+- 데이터 증강: 파라프레이징 및 역번역 기반 오버샘플링
+- 손실 함수: Focal Loss with Label Smoothing
+- 옵티마이저: AdamW
+- 스케줄러: ReduceLROnPlateau

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "[MASK]": 128000
+}

config.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "architectures": [
+    "DebertaV2ForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.2,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "admiration",
+    "1": "amusement",
+    "2": "anger",
+    "3": "annoyance",
+    "4": "approval",
+    "5": "caring",
+    "6": "confusion",
+    "7": "curiosity",
+    "8": "desire",
+    "9": "disappointment",
+    "10": "disapproval",
+    "11": "disgust",
+    "12": "embarrassment",
+    "13": "excitement",
+    "14": "fear",
+    "15": "gratitude",
+    "16": "grief",
+    "17": "joy",
+    "18": "love",
+    "19": "nervousness",
+    "20": "optimism",
+    "21": "pride",
+    "22": "realization",
+    "23": "relief",
+    "24": "remorse",
+    "25": "sadness",
+    "26": "surprise",
+    "27": "neutral"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "admiration": 0,
+    "amusement": 1,
+    "anger": 2,
+    "annoyance": 3,
+    "approval": 4,
+    "caring": 5,
+    "confusion": 6,
+    "curiosity": 7,
+    "desire": 8,
+    "disappointment": 9,
+    "disapproval": 10,
+    "disgust": 11,
+    "embarrassment": 12,
+    "excitement": 13,
+    "fear": 14,
+    "gratitude": 15,
+    "grief": 16,
+    "joy": 17,
+    "love": 18,
+    "nervousness": 19,
+    "optimism": 20,
+    "pride": 21,
+    "realization": 22,
+    "relief": 23,
+    "remorse": 24,
+    "sadness": 25,
+    "surprise": 26,
+    "neutral": 27
+  },
+  "layer_norm_eps": 1e-07,
+  "legacy": true,
+  "max_position_embeddings": 512,
+  "max_relative_positions": -1,
+  "model_type": "deberta-v2",
+  "norm_rel_ebd": "layer_norm",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooler_dropout": 0,
+  "pooler_hidden_act": "gelu",
+  "pooler_hidden_size": 1024,
+  "pos_att_type": [
+    "p2c",
+    "c2p"
+  ],
+  "position_biased_input": false,
+  "position_buckets": 256,
+  "problem_type": "multi_label_classification",
+  "relative_attention": true,
+  "share_att_key": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.3",
+  "type_vocab_size": 0,
+  "vocab_size": 128100
+}

label_info.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "emotion_labels": [
+    "admiration",
+    "amusement",
+    "anger",
+    "annoyance",
+    "approval",
+    "caring",
+    "confusion",
+    "curiosity",
+    "desire",
+    "disappointment",
+    "disapproval",
+    "disgust",
+    "embarrassment",
+    "excitement",
+    "fear",
+    "gratitude",
+    "grief",
+    "joy",
+    "love",
+    "nervousness",
+    "optimism",
+    "pride",
+    "realization",
+    "relief",
+    "remorse",
+    "sadness",
+    "surprise",
+    "neutral"
+  ],
+  "num_labels": 28,
+  "label2id": {
+    "admiration": 0,
+    "amusement": 1,
+    "anger": 2,
+    "annoyance": 3,
+    "approval": 4,
+    "caring": 5,
+    "confusion": 6,
+    "curiosity": 7,
+    "desire": 8,
+    "disappointment": 9,
+    "disapproval": 10,
+    "disgust": 11,
+    "embarrassment": 12,
+    "excitement": 13,
+    "fear": 14,
+    "gratitude": 15,
+    "grief": 16,
+    "joy": 17,
+    "love": 18,
+    "nervousness": 19,
+    "optimism": 20,
+    "pride": 21,
+    "realization": 22,
+    "relief": 23,
+    "remorse": 24,
+    "sadness": 25,
+    "surprise": 26,
+    "neutral": 27
+  },
+  "id2label": {
+    "0": "admiration",
+    "1": "amusement",
+    "2": "anger",
+    "3": "annoyance",
+    "4": "approval",
+    "5": "caring",
+    "6": "confusion",
+    "7": "curiosity",
+    "8": "desire",
+    "9": "disappointment",
+    "10": "disapproval",
+    "11": "disgust",
+    "12": "embarrassment",
+    "13": "excitement",
+    "14": "fear",
+    "15": "gratitude",
+    "16": "grief",
+    "17": "joy",
+    "18": "love",
+    "19": "nervousness",
+    "20": "optimism",
+    "21": "pride",
+    "22": "realization",
+    "23": "relief",
+    "24": "remorse",
+    "25": "sadness",
+    "26": "surprise",
+    "27": "neutral"
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da0a4283b913863fc4d52afa9e40ed487c8a9d7ecda984ec0b2ec4e2b3eae590
+size 1740411056

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spm.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
+size 2464616

thresholds.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "optimal_thresholds": [
+    0.5,
+    0.4,
+    0.3,
+    0.3,
+    0.45,
+    0.25,
+    0.3,
+    0.45,
+    0.5,
+    0.25,
+    0.45,
+    0.55,
+    0.4,
+    0.6000000000000001,
+    0.25,
+    0.45,
+    0.05,
+    0.55,
+    0.5,
+    0.3,
+    0.55,
+    0.5,
+    0.35000000000000003,
+    0.35000000000000003,
+    0.4,
+    0.5,
+    0.3,
+    0.4
+  ],
+  "threshold_description": "Optimized thresholds for each emotion label for best F1 score"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,59 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128000": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "sp_model_kwargs": {},
+  "split_by_punct": false,
+  "tokenizer_class": "DebertaV2Tokenizer",
+  "unk_token": "[UNK]",
+  "vocab_type": "spm"
+}