fairytale_generator / model_comparion.py
Antonio0616's picture
mistaralai 사용
e6b34ce
raw
history blame
2.6 kB
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# ✅ 고성능 모델 설정
MODEL_NAME = "mistralai/Mixtral-8x7B-Instruct-v0.1"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ✅ 토크나이저 및 모델 로드
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="auto",
)
# ✅ 프롬프트 생성 함수
def build_comparison_prompt(story_a, story_b):
return f"""<s>[INST] You are a children's story evaluation expert.
Compare the following two stories on:
1. Coherence (structure and flow)
2. Creativity (imagination and originality)
3. Engagement (fun and emotional draw)
Evaluate both stories and score each criterion from 1 to 5.
Then, provide a brief comment and declare the overall better story.
Story A:
{story_a}
Story B:
{story_b}
Respond in this format:
Story A:
- Coherence: ?/5
- Creativity: ?/5
- Engagement: ?/5
- Comment: ...
Story B:
- Coherence: ?/5
- Creativity: ?/5
- Engagement: ?/5
- Comment: ...
🟢 Overall Winner: Story A or Story B
Comment: ... [/INST]
"""
# ✅ 평가 함수
def evaluate_stories_with_mixtral(story_a, story_b):
prompt = build_comparison_prompt(story_a, story_b)
inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to(device)
outputs = model.generate(
**inputs,
max_new_tokens=1024,
temperature=0.7,
top_p=0.9,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
result = result.replace(prompt.strip(), "").strip() # 프롬프트 제거
if len(result) < 30 or "Story A" not in result:
return "❌ 평가 실패: 출력이 부족하거나 형식이 맞지 않습니다. 다시 시도해주세요."
return result
# ✅ Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🧙‍♂️ 동화 비교 평가기 (Mixtral 8x7B)\n긴 동화도 안정적으로 비교합니다!")
with gr.Row():
story_a = gr.Textbox(label="📘 Story A", lines=20, placeholder="전체 동화 A 입력")
story_b = gr.Textbox(label="📗 Story B", lines=20, placeholder="전체 동화 B 입력")
result = gr.Textbox(label="📊 평가 결과", lines=35)
compare = gr.Button("🧠 평가하기")
compare.click(fn=evaluate_stories_with_mixtral, inputs=[story_a, story_b], outputs=result)
if __name__ == "__main__":
demo.launch(share=True)