File size: 2,316 Bytes
e6b34ce
 
 
 
935dda0
e6b34ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# ✅ 고성능 모델 지정 - 구글
MODEL_NAME = "google/flan-t5-large"

# ✅ 디바이스 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ✅ 모델 및 토크나이저 로드
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(device)

# ✅ 프롬프트 생성
def build_prompt(story_a, story_b):
    return f"""
You are a story evaluation assistant. Please compare the two children's stories below using the following criteria:
1. Coherence (structure and flow)
2. Creativity (imagination and originality)
3. Engagement (fun and emotional draw)

Give each story a score from 1 to 5 on each criterion. Then summarize which story is better overall.

Story A: {story_a}

Story B: {story_b}

Respond in this format:

Story A:
- Coherence: ?/5
- Creativity: ?/5
- Engagement: ?/5
- Comment: ...

Story B:
- Coherence: ?/5
- Creativity: ?/5
- Engagement: ?/5
- Comment: ...

🟢 Overall Winner: Story A or Story B
"""

# ✅ 평가 함수
def evaluate_stories(story_a, story_b):
    prompt = build_prompt(story_a, story_b)
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024).to(device)

    outputs = model.generate(
        **inputs,
        max_length=512,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        num_beams=4,
    )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    if len(result.strip()) < 20:
        return "❌ 평가 실패: 응답이 너무 짧습니다. 스토리를 간단히 줄이거나 모델 성능을 높여보세요."
    return result.strip()

# ✅ Gradio 인터페이스
with gr.Blocks() as demo:
    gr.Markdown("## 🧙‍♂️ 동화 비교 평가기 (Flan-T5 Large)\n두 개의 동화를 비교해 평가합니다!")

    with gr.Row():
        story_a = gr.Textbox(label="📘 Story A", lines=12)
        story_b = gr.Textbox(label="📗 Story B", lines=12)

    result = gr.Textbox(label="📊 평가 결과", lines=20)
    compare = gr.Button("🧠 평가하기")

    compare.click(fn=evaluate_stories, inputs=[story_a, story_b], outputs=result)

if __name__ == "__main__":
    demo.launch(share=True)