Text์ ๊ฐ์ ์ ๋ถ์ํ๊ธฐ ์ํ ๋ชจ๋ธ์ ๋๋ค.
KcELECTRA ๋ชจ๋ธ์ ๊ธฐ๋ฐ์ผ๋ก ์ฝ 22๋ง๊ฐ์ ๊ฐ์ ๋ฌธ์ฅ์ ํ์ตํ์์ต๋๋ค.
๊ฐ์ ์ ์ด 6๊ฐ์ ์นดํ ๊ณ ๋ฆฌ๋ก ๋์ถ๋๋ฉฐ ๊ธฐ์จ, ๋นํฉ, ๋ถ๋ ธ, ๋ถ์, ์์ฒ, ์ฌํ ์ ๋๋ค.
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
model_name = "noridorimari/emotion_classifier"
# ๊ฐ์ ๋ผ๋ฒจ ๋งคํ
id2label = {
0: "๊ธฐ์จ", # happy
1: "๋นํฉ", # embarrass
2: "๋ถ๋
ธ", # anger
3: "๋ถ์", # unrest
4: "์์ฒ", # damaged
5: "์ฌํ" # sadness
}
label2id = {v: k for k, v in id2label.items()}
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# id2label ์ ๋ณด๋ฅผ ์ง์ ์ค์ (config์ ์ถ๊ฐ)
model.config.id2label = id2label
model.config.label2id = label2id
classifier = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
return_all_scores=True,
device=0 if torch.cuda.is_available() else -1
)
texts = [
"์ค๋ ํ์ฌ์์ ์ค์ํด์ ๋๋ฌด ๋ถ์ํด.",
"์น๊ตฌ๊ฐ ๋ํํ
๊ฑฐ์ง๋งํด์ ์ ๋ง ํ๊ฐ ๋ฌ์ด.",
"์ข์ ์์์ด ์์ด์ ํ๋ฃจ ์ข
์ผ ๊ธฐ๋ถ์ด ์ข์!"
]
for text in texts:
preds = classifier(text)[0]
# ํ๋ฅ ๋์ ์์ผ๋ก ์ ๋ ฌ
preds = sorted(preds, key=lambda x: x["score"], reverse=True)
top = preds[0]
print(f"\n๋ฌธ์ฅ: {text}")
print(f"์์ธก ๊ฐ์ : {top['label']} ({top['score']*100:.2f}%)")
print("์์ธ ํ๋ฅ ๋ถํฌ:")
for p in preds:
print(f" {p['label']:>4} : {p['score']*100:.2f}%")
@misc{lee2021kcelectra,
author = {Junbum Lee},
title = {KcELECTRA: Korean comments ELECTRA},
year = {2021},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/Beomi/KcELECTRA}}
}
- Downloads last month
- 15
Model tree for noridorimari/emotion_classifier
Base model
beomi/KcELECTRA-base