import os import sys import base64 from pathlib import Path """ Generate high-quality slide illustrations using OpenAI's Responses API with gpt-image-1. This uses the latest image generation capabilities via the Responses API. Outputs: docs/slide_illustrations/slide-1.png ... slide-5.png Requires: OPENAI_API_KEY in environment. """ SLIDE_PROMPTS = [ ( 1, "Consistent Output Control", ( "Minimalist flat-design infographic showing a flow from a user message to an agent " "to a JSON schema validator producing a consistent structured JSON output. Use iconography only, no text. " "Elements: speech bubble -> robot head -> curly-brace JSON icon with checkmark badge. " "Style: clean, vector, blue/teal color palette, high contrast, centered composition." ), ), ( 2, "Input Control via Missing Fields", ( "Modern UI concept art of a form with several fields, where required fields are highlighted and missing fields are flagged. " "An assistant bubble points to the missing fields to ask clarifying questions. No text labels. " "Style: product illustration, rounded cards, subtle shadows, blue/orange highlights, vector." ), ), ( 3, "Dynamic Input Schemas (KB & Internet)", ( "Diagram of an agent deciding between two paths: a knowledge base database icon and an internet globe icon. " "Branched arrows from the agent to each tool, then back to a combined result. Iconography only. " "Style: sleek tech infographic, gradient accents, minimal lines, no words." ), ), ( 4, "Multistep Execution & Delegation", ( "An orchestrator node delegating tasks to multiple sub-agents in sequence. " "Show numbered or visually ordered steps without using text: use small numbered badges or dots. " "Style: systems diagram, monochrome with one accent color, clean vectors, no text." ), ), ( 5, "API Calls (NCBI/Web Search)", ( "Magnifying glass over a DNA helix icon next to a web globe, representing API calls to biomedical and web search. " "Arrows indicate request and response. Iconography only, no text. " "Style: scientific-tech aesthetic, cool tones, crisp vector illustration." ), ), ] def enhance_prompt_with_gpt4(client, title: str, concept: str) -> str: """Use GPT-4o to expand the concept into a professional image generation prompt.""" try: system_prompt = ( "You are an expert at writing image generation prompts for professional technical illustrations. " "Given a slide title and concept, expand it into a detailed, specific prompt that will produce " "a high-quality, structured infographic-style illustration. Focus on: clean composition, professional " "design, iconography without text labels, consistent color palette, and visual hierarchy. " "Return ONLY the enhanced prompt, no explanations." ) user_prompt = f"Slide title: {title}\n\nConcept: {concept}\n\nEnhanced image generation prompt:" resp = client.chat.completions.create( model="gpt-4o", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ], temperature=0.7, max_tokens=300, ) enhanced = resp.choices[0].message.content or concept return enhanced.strip() except Exception as e: print(f" Prompt enhancement failed, using original: {e}") return concept def generate_image_via_responses_api(client, prompt: str) -> bytes: """Generate image using Responses API with image_generation tool.""" try: # Try gpt-4o which may not require verification response = client.responses.create( model="gpt-4o", input=prompt, tools=[{"type": "image_generation"}], ) # Extract image data from response for output in response.output: if output.type == "image_generation_call": # The result is base64-encoded image data return base64.b64decode(output.result) raise RuntimeError("No image_generation_call found in response output") except Exception as e: print(f"ERROR: Responses API image generation failed: {e}") raise def main(): # Check API key if not os.getenv("OPENAI_API_KEY"): print("ERROR: OPENAI_API_KEY not set in environment.") print("PowerShell example: $Env:OPENAI_API_KEY = \"sk-...\"") sys.exit(1) try: from openai import OpenAI except ImportError: print("ERROR: openai package not installed. Run: pip install openai") sys.exit(1) client = OpenAI() project_root = Path(__file__).resolve().parents[1] out_dir = project_root / "docs" / "slide_illustrations" out_dir.mkdir(parents=True, exist_ok=True) print("Using OpenAI Responses API with gpt-image-1 for high-quality generation\n") for idx, title, concept in SLIDE_PROMPTS: print(f"[{idx}/5] Generating: {title}") # Step 1: Enhance prompt with GPT-4o print(" → Enhancing prompt with GPT-4o...") enhanced_prompt = enhance_prompt_with_gpt4(client, title, concept) print(f" → Enhanced: {enhanced_prompt[:80]}...") # Step 2: Generate image via Responses API print(" → Generating image with Responses API...") img_bytes = generate_image_via_responses_api(client, enhanced_prompt) # Step 3: Save out_path = out_dir / f"slide-{idx}.png" with open(out_path, "wb") as f: f.write(img_bytes) print(f" ✓ Saved: {out_path}\n") print(f"Done! All 5 illustrations saved to: {out_dir}") if __name__ == "__main__": main()