mj-learn-backend

Running

App Files Files Community

mj-learn-backend / vocabularyBuilder.py

pykara

Update vocabularyBuilder.py

6f6b058 verified 2 months ago

raw

history blame

11.3 kB

	from flask import Flask, Blueprint, jsonify, request, current_app
	from flask_cors import CORS
	import openai
	import os
	import random
	import requests
	import json # Add this at the top
	import logging
	import re # used below

	# ---------- Blueprint ----------
	vocab_bp = Blueprint("vocab", __name__)

	logging.basicConfig(
	filename='app.log',
	level=logging.DEBUG, # Use DEBUG for detailed logs during development
	format='%(asctime)s - %(levelname)s - %(message)s'
	)

	app = Flask(__name__)
	CORS(app)

	# Cohere + OpenAI config
	# (1) UPDATED: Cohere v2 Chat endpoint
	COHERE_API_URL = "https://api.cohere.com/v2/chat"
	_OPENAI_API_KEY_FALLBACK = os.getenv("OPENAI_API_KEY", "")
	_COHERE_API_KEY_FALLBACK = os.getenv("COHERE_API_KEY", "")

	def _ensure_openai_key():
	"""Set openai.api_key from Flask config or env before each API call."""
	api_key = (current_app.config.get("OPENAI_API_KEY") if current_app else None) or _OPENAI_API_KEY_FALLBACK
	if api_key:
	openai.api_key = api_key

	def _cohere_headers():
	"""Headers for Cohere API, reading key from Flask config or env."""
	api_key = (current_app.config.get("COHERE_API_KEY") if current_app else None) or _COHERE_API_KEY_FALLBACK
	if not api_key:
	return None
	return {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	def _extract_text_v2(resp_json: dict) -> str:
	"""
	v2 /chat returns:
	{ "message": { "content": [ { "type": "text", "text": "..." } ] } }
	"""
	msg = resp_json.get("message", {})
	content = msg.get("content", [])
	if isinstance(content, list) and content:
	block = content[0]
	if isinstance(block, dict):
	return (block.get("text") or "").strip()
	return ""

	@vocab_bp.route('/generate-word-association', methods=['GET'])
	def generate_word_association():
	try:
	# -----------------------------
	# 1️⃣ Generate Vocabulary Word + 3 Related Words
	# -----------------------------
	prompt_related = """
	Generate a simple vocabulary word and three related words.
	Return only the JSON output. Do not include any explanation.

	Format:
	{
	"word": "<main_word>",
	"options": ["word1", "word2", "word3"]
	}
	"""

	headers = _cohere_headers()
	if not headers:
	return jsonify({"error": "COHERE_API_KEY not set"}), 500

	# (2) UPDATED: messages payload
	data_related = {
	"model": "command-r-08-2024",
	"messages": [
	{"role": "user", "content": prompt_related}
	],
	"max_tokens": 100,
	"temperature": 1.0
	}

	r_related = requests.post(COHERE_API_URL, json=data_related, headers=headers)
	if r_related.status_code != 200 or not r_related.text.strip():
	return jsonify({"error": "Failed to generate vocabulary."}), 500

	# (3) UPDATED: v2 parsing
	raw_text = _extract_text_v2(r_related.json())
	match = re.search(r'\{.*\}', raw_text, re.DOTALL)
	if not match:
	return jsonify({"error": "Invalid JSON from related words API"}), 500

	json_str = match.group(0)
	related_data = json.loads(json_str)

	word = related_data.get("word", "").strip()
	related_options = related_data.get("options", [])

	if len(related_options) < 3:
	return jsonify({"error": "Not enough related words"}), 500

	# -----------------------------
	# 2️⃣ Generate 2 Unrelated Words
	# -----------------------------
	prompt_unrelated = f"""
	Generate two random words that are NOT related to '{word}' in meaning.
	The words should belong to a different category.
	Return only the JSON output:
	{{
	"options": ["word1", "word2"]
	}}
	"""

	# (2) UPDATED: messages payload
	data_unrelated = {
	"model": "command-r-08-2024",
	"messages": [
	{"role": "user", "content": prompt_unrelated}
	],
	"max_tokens": 50,
	"temperature": 1.0
	}

	r_unrelated = requests.post(COHERE_API_URL, json=data_unrelated, headers=headers)
	raw_unrelated_text = _extract_text_v2(r_unrelated.json())
	match_unrelated = re.search(r'\{.*\}', raw_unrelated_text, re.DOTALL)
	if not match_unrelated:
	return jsonify({"error": "Invalid JSON from unrelated words API"}), 500

	json_str_unrelated = match_unrelated.group(0)
	unrelated_options = json.loads(json_str_unrelated).get("options", [])

	if len(unrelated_options) < 2:
	return jsonify({"error": "Not enough unrelated words"}), 500

	# -----------------------------
	# 3️⃣ Combine & Shuffle Options
	# -----------------------------
	all_options = related_options + unrelated_options
	random.shuffle(all_options)

	# -----------------------------
	# 4️⃣ Generate Image for the Word (OpenAI — unchanged)
	# -----------------------------
	prompt_image = (
	f"A conceptual, symbolic, high-quality illustration representing the meaning of the word '{word}'. "
	"Do not use text. The image should reflect the emotional or logical meaning of the word."
	)

	try:
	_ensure_openai_key()
	image_response = openai.images.generate(
	model="dall-e-3",
	prompt=prompt_image,
	n=1,
	size="1024x1024"
	)
	image_url = image_response.data[0].url
	except Exception as img_err:
	logging.error("Image generation failed: %s", str(img_err))
	image_url = ""

	# -----------------------------
	# ✅ Return All Data
	# -----------------------------
	return jsonify({
	"word": word,
	"options": all_options,
	"correctOptions": related_options,
	"image_url": image_url
	}), 200

	except Exception as e:
	logging.error("Error in generate_word_association: %s", str(e))
	return jsonify({"error": f"Internal Server Error: {str(e)}"}), 500

	# ----------------------------
	# 2️⃣ Validate User's Selected Words (OpenAI — unchanged)
	# ----------------------------
	@vocab_bp.route('/validate-selection', methods=['POST'])
	def validate_selection():
	try:
	data = request.json
	question_word = data.get("word")
	selected_words = data.get("selected_words")
	all_options = data.get("all_options")

	if not question_word or not selected_words or not all_options:
	return jsonify({"error": "Missing word, selections, or full option list"}), 400

	validation_prompt = f"""
	The main word is '{question_word}'.

	For each of the following words, evaluate whether it is logically associated with the main word.
	Provide:
	- A boolean value: true if it is associated, false otherwise.
	- A brief explanation of why it is or isn't associated.

	Words to evaluate: {all_options}

	Return the response as JSON in this format:
	{{
	"feedback": [
	{{ "word": "word1", "is_correct": true/false, "reason": "explanation" }},
	...
	]
	}}
	"""

	_ensure_openai_key()
	response = openai.chat.completions.create(
	model="gpt-4-turbo",
	messages=[{"role": "user", "content": validation_prompt}],
	max_tokens=500,
	temperature=0.5
	)

	feedback_text = response.choices[0].message.content.strip()

	# Try parsing directly as JSON
	try:
	feedback_json = json.loads(feedback_text)
	structured_feedback = feedback_json.get("feedback", [])
	except json.JSONDecodeError:
	# If markdown or formatting exists, clean it and retry
	feedback_text = feedback_text.replace("```json", "").replace("```", "").strip()
	feedback_json = json.loads(feedback_text)
	structured_feedback = feedback_json.get("feedback", [])

	if not isinstance(structured_feedback, list):
	return jsonify({"error": "Unexpected feedback format from AI."}), 500

	correct_answers = [entry.get("word") for entry in structured_feedback if entry.get("is_correct")]

	return jsonify({
	"feedback": structured_feedback,
	"correctAnswers": correct_answers
	}), 200

	except Exception as e:
	print(f"Error validating selection: {e}")
	return jsonify({"error": "An error occurred while validating the selection."}), 500

	# ----------------------------
	# 3️⃣ Validate User's Sentence Using AI (OpenAI — unchanged)
	# ----------------------------
	@vocab_bp.route('/validate-sentence', methods=['POST'])
	def validate_sentence():
	try:
	data = request.json
	sentence = data.get("sentence")
	selected_words = data.get("selected_words")

	if not sentence or not selected_words:
	return jsonify({"error": "Sentence and selected words are required"}), 400

	validation_prompt = f"""
	Evaluate the following sentence for grammar, clarity, and correctness.
	Ensure that the selected words {selected_words} are used correctly.
	If the sentence is incorrect, suggest improvements.
	Sentence: '{sentence}'
	"""
	_ensure_openai_key()
	response = openai.chat.completions.create(
	model="gpt-4-turbo",
	messages=[{"role": "user", "content": validation_prompt}],
	max_tokens=500,
	temperature=0.7
	)

	feedback = response.choices[0].message.content.strip()
	return jsonify({"feedback": feedback}), 200

	except Exception as e:
	print(f"Error validating sentence: {e}")
	return jsonify({"error": "An error occurred while validating the sentence."}), 500

	@vocab_bp.route('/generate-image', methods=['POST'])
	def generate_image():
	try:
	data = request.json
	word = data.get("word")

	if not word:
	return jsonify({"error": "Word is required to generate image"}), 400

	prompt = (
	f"A conceptual, high-quality illustration that visually explains the word '{word}'. "
	"Use realistic or symbolic elements to represent its meaning clearly. No text in the image."
	)

	_ensure_openai_key()
	response = openai.images.generate(
	model="dall-e-3",
	prompt=prompt,
	n=1,
	size="1024x1024"
	)

	image_url = response.data[0].url
	return jsonify({"image_url": image_url}), 200

	except Exception as e:
	return jsonify({"error": str(e)}), 500

	# ---------- Standalone (local testing) ----------
	if __name__ == '__main__':
	app = Flask(__name__)
	CORS(app)
	app.config["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "")
	app.config["COHERE_API_KEY"] = os.getenv("COHERE_API_KEY", "")
	app.register_blueprint(vocab_bp, url_prefix='')
	app.run(host='0.0.0.0', port=5002, debug=True)