mj-learn-backend / vocabularyBuilder.py
pykara's picture
Update vocabularyBuilder.py
6f6b058 verified
raw
history blame
11.3 kB
from flask import Flask, Blueprint, jsonify, request, current_app
from flask_cors import CORS
import openai
import os
import random
import requests
import json # Add this at the top
import logging
import re # used below
# ---------- Blueprint ----------
vocab_bp = Blueprint("vocab", __name__)
logging.basicConfig(
filename='app.log',
level=logging.DEBUG, # Use DEBUG for detailed logs during development
format='%(asctime)s - %(levelname)s - %(message)s'
)
app = Flask(__name__)
CORS(app)
# Cohere + OpenAI config
# (1) UPDATED: Cohere v2 Chat endpoint
COHERE_API_URL = "https://api.cohere.com/v2/chat"
_OPENAI_API_KEY_FALLBACK = os.getenv("OPENAI_API_KEY", "")
_COHERE_API_KEY_FALLBACK = os.getenv("COHERE_API_KEY", "")
def _ensure_openai_key():
"""Set openai.api_key from Flask config or env before each API call."""
api_key = (current_app.config.get("OPENAI_API_KEY") if current_app else None) or _OPENAI_API_KEY_FALLBACK
if api_key:
openai.api_key = api_key
def _cohere_headers():
"""Headers for Cohere API, reading key from Flask config or env."""
api_key = (current_app.config.get("COHERE_API_KEY") if current_app else None) or _COHERE_API_KEY_FALLBACK
if not api_key:
return None
return {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
def _extract_text_v2(resp_json: dict) -> str:
"""
v2 /chat returns:
{ "message": { "content": [ { "type": "text", "text": "..." } ] } }
"""
msg = resp_json.get("message", {})
content = msg.get("content", [])
if isinstance(content, list) and content:
block = content[0]
if isinstance(block, dict):
return (block.get("text") or "").strip()
return ""
@vocab_bp.route('/generate-word-association', methods=['GET'])
def generate_word_association():
try:
# -----------------------------
# 1️⃣ Generate Vocabulary Word + 3 Related Words
# -----------------------------
prompt_related = """
Generate a simple vocabulary word and three related words.
Return only the JSON output. Do not include any explanation.
Format:
{
"word": "<main_word>",
"options": ["word1", "word2", "word3"]
}
"""
headers = _cohere_headers()
if not headers:
return jsonify({"error": "COHERE_API_KEY not set"}), 500
# (2) UPDATED: messages payload
data_related = {
"model": "command-r-08-2024",
"messages": [
{"role": "user", "content": prompt_related}
],
"max_tokens": 100,
"temperature": 1.0
}
r_related = requests.post(COHERE_API_URL, json=data_related, headers=headers)
if r_related.status_code != 200 or not r_related.text.strip():
return jsonify({"error": "Failed to generate vocabulary."}), 500
# (3) UPDATED: v2 parsing
raw_text = _extract_text_v2(r_related.json())
match = re.search(r'\{.*\}', raw_text, re.DOTALL)
if not match:
return jsonify({"error": "Invalid JSON from related words API"}), 500
json_str = match.group(0)
related_data = json.loads(json_str)
word = related_data.get("word", "").strip()
related_options = related_data.get("options", [])
if len(related_options) < 3:
return jsonify({"error": "Not enough related words"}), 500
# -----------------------------
# 2️⃣ Generate 2 Unrelated Words
# -----------------------------
prompt_unrelated = f"""
Generate two random words that are NOT related to '{word}' in meaning.
The words should belong to a different category.
Return only the JSON output:
{{
"options": ["word1", "word2"]
}}
"""
# (2) UPDATED: messages payload
data_unrelated = {
"model": "command-r-08-2024",
"messages": [
{"role": "user", "content": prompt_unrelated}
],
"max_tokens": 50,
"temperature": 1.0
}
r_unrelated = requests.post(COHERE_API_URL, json=data_unrelated, headers=headers)
raw_unrelated_text = _extract_text_v2(r_unrelated.json())
match_unrelated = re.search(r'\{.*\}', raw_unrelated_text, re.DOTALL)
if not match_unrelated:
return jsonify({"error": "Invalid JSON from unrelated words API"}), 500
json_str_unrelated = match_unrelated.group(0)
unrelated_options = json.loads(json_str_unrelated).get("options", [])
if len(unrelated_options) < 2:
return jsonify({"error": "Not enough unrelated words"}), 500
# -----------------------------
# 3️⃣ Combine & Shuffle Options
# -----------------------------
all_options = related_options + unrelated_options
random.shuffle(all_options)
# -----------------------------
# 4️⃣ Generate Image for the Word (OpenAI — unchanged)
# -----------------------------
prompt_image = (
f"A conceptual, symbolic, high-quality illustration representing the meaning of the word '{word}'. "
"Do not use text. The image should reflect the emotional or logical meaning of the word."
)
try:
_ensure_openai_key()
image_response = openai.images.generate(
model="dall-e-3",
prompt=prompt_image,
n=1,
size="1024x1024"
)
image_url = image_response.data[0].url
except Exception as img_err:
logging.error("Image generation failed: %s", str(img_err))
image_url = ""
# -----------------------------
# ✅ Return All Data
# -----------------------------
return jsonify({
"word": word,
"options": all_options,
"correctOptions": related_options,
"image_url": image_url
}), 200
except Exception as e:
logging.error("Error in generate_word_association: %s", str(e))
return jsonify({"error": f"Internal Server Error: {str(e)}"}), 500
# ----------------------------
# 2️⃣ Validate User's Selected Words (OpenAI — unchanged)
# ----------------------------
@vocab_bp.route('/validate-selection', methods=['POST'])
def validate_selection():
try:
data = request.json
question_word = data.get("word")
selected_words = data.get("selected_words")
all_options = data.get("all_options")
if not question_word or not selected_words or not all_options:
return jsonify({"error": "Missing word, selections, or full option list"}), 400
validation_prompt = f"""
The main word is '{question_word}'.
For each of the following words, evaluate whether it is logically associated with the main word.
Provide:
- A boolean value: true if it is associated, false otherwise.
- A brief explanation of why it is or isn't associated.
Words to evaluate: {all_options}
Return the response as JSON in this format:
{{
"feedback": [
{{ "word": "word1", "is_correct": true/false, "reason": "explanation" }},
...
]
}}
"""
_ensure_openai_key()
response = openai.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": validation_prompt}],
max_tokens=500,
temperature=0.5
)
feedback_text = response.choices[0].message.content.strip()
# Try parsing directly as JSON
try:
feedback_json = json.loads(feedback_text)
structured_feedback = feedback_json.get("feedback", [])
except json.JSONDecodeError:
# If markdown or formatting exists, clean it and retry
feedback_text = feedback_text.replace("```json", "").replace("```", "").strip()
feedback_json = json.loads(feedback_text)
structured_feedback = feedback_json.get("feedback", [])
if not isinstance(structured_feedback, list):
return jsonify({"error": "Unexpected feedback format from AI."}), 500
correct_answers = [entry.get("word") for entry in structured_feedback if entry.get("is_correct")]
return jsonify({
"feedback": structured_feedback,
"correctAnswers": correct_answers
}), 200
except Exception as e:
print(f"Error validating selection: {e}")
return jsonify({"error": "An error occurred while validating the selection."}), 500
# ----------------------------
# 3️⃣ Validate User's Sentence Using AI (OpenAI — unchanged)
# ----------------------------
@vocab_bp.route('/validate-sentence', methods=['POST'])
def validate_sentence():
try:
data = request.json
sentence = data.get("sentence")
selected_words = data.get("selected_words")
if not sentence or not selected_words:
return jsonify({"error": "Sentence and selected words are required"}), 400
validation_prompt = f"""
Evaluate the following sentence for grammar, clarity, and correctness.
Ensure that the selected words {selected_words} are used correctly.
If the sentence is incorrect, suggest improvements.
Sentence: '{sentence}'
"""
_ensure_openai_key()
response = openai.chat.completions.create(
model="gpt-4-turbo",
messages=[{"role": "user", "content": validation_prompt}],
max_tokens=500,
temperature=0.7
)
feedback = response.choices[0].message.content.strip()
return jsonify({"feedback": feedback}), 200
except Exception as e:
print(f"Error validating sentence: {e}")
return jsonify({"error": "An error occurred while validating the sentence."}), 500
@vocab_bp.route('/generate-image', methods=['POST'])
def generate_image():
try:
data = request.json
word = data.get("word")
if not word:
return jsonify({"error": "Word is required to generate image"}), 400
prompt = (
f"A conceptual, high-quality illustration that visually explains the word '{word}'. "
"Use realistic or symbolic elements to represent its meaning clearly. No text in the image."
)
_ensure_openai_key()
response = openai.images.generate(
model="dall-e-3",
prompt=prompt,
n=1,
size="1024x1024"
)
image_url = response.data[0].url
return jsonify({"image_url": image_url}), 200
except Exception as e:
return jsonify({"error": str(e)}), 500
# ---------- Standalone (local testing) ----------
if __name__ == '__main__':
app = Flask(__name__)
CORS(app)
app.config["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "")
app.config["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY", "")
app.register_blueprint(vocab_bp, url_prefix='')
app.run(host='0.0.0.0', port=5002, debug=True)