Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import time | |
| import base64 | |
| from io import BytesIO | |
| from pathlib import Path | |
| """ | |
| Generate one AI illustration per slide concept using OpenAI Images API. | |
| Outputs: docs/slide_illustrations/slide-1.png ... slide-5.png | |
| Requires: OPENAI_API_KEY in environment and internet access. | |
| """ | |
| SLIDE_PROMPTS = [ | |
| ( | |
| 1, | |
| "Consistent Output Control", | |
| ( | |
| "Minimalist flat-design infographic showing a flow from a user message to an agent " | |
| "to a JSON schema validator producing a consistent structured JSON output. Use iconography only, no text. " | |
| "Elements: speech bubble -> robot head -> curly-brace JSON icon with checkmark badge. " | |
| "Style: clean, vector, blue/teal color palette, high contrast, centered composition." | |
| ), | |
| ), | |
| ( | |
| 2, | |
| "Input Control via Missing Fields", | |
| ( | |
| "Modern UI concept art of a form with several fields, where required fields are highlighted and missing fields are flagged. " | |
| "An assistant bubble points to the missing fields to ask clarifying questions. No text labels. " | |
| "Style: product illustration, rounded cards, subtle shadows, blue/orange highlights, vector." | |
| ), | |
| ), | |
| ( | |
| 3, | |
| "Dynamic Input Schemas (KB & Internet)", | |
| ( | |
| "Diagram of an agent deciding between two paths: a knowledge base database icon and an internet globe icon. " | |
| "Branched arrows from the agent to each tool, then back to a combined result. Iconography only. " | |
| "Style: sleek tech infographic, gradient accents, minimal lines, no words." | |
| ), | |
| ), | |
| ( | |
| 4, | |
| "Multistep Execution & Delegation", | |
| ( | |
| "An orchestrator node delegating tasks to multiple sub-agents in sequence. " | |
| "Show numbered or visually ordered steps without using text: use small numbered badges or dots. " | |
| "Style: systems diagram, monochrome with one accent color, clean vectors, no text." | |
| ), | |
| ), | |
| ( | |
| 5, | |
| "API Calls (NCBI/Web Search)", | |
| ( | |
| "Magnifying glass over a DNA helix icon next to a web globe, representing API calls to biomedical and web search. " | |
| "Arrows indicate request and response. Iconography only, no text. " | |
| "Style: scientific-tech aesthetic, cool tones, crisp vector illustration." | |
| ), | |
| ), | |
| ] | |
| def ensure_api_key() -> str: | |
| key = os.getenv("OPENAI_API_KEY") | |
| if not key: | |
| print("ERROR: OPENAI_API_KEY not set in environment. Set it and rerun.") | |
| print("PowerShell example:") | |
| print(" $Env:OPENAI_API_KEY = \"sk-...\"") | |
| sys.exit(1) | |
| return key | |
| def main(): | |
| # Initialize OpenAI client if available | |
| openai_enabled = False | |
| openai_client = None | |
| if os.getenv("OPENAI_API_KEY"): | |
| try: | |
| from openai import OpenAI # type: ignore | |
| openai_client = OpenAI() | |
| openai_enabled = True | |
| except Exception as e: | |
| print("WARNING: OpenAI client unavailable; will try Hugging Face fallback.") | |
| print(f"Details: {e}\n") | |
| # Initialize Hugging Face client if available | |
| hf_enabled = False | |
| hf_client = None | |
| hf_model = os.getenv("HF_IMAGE_MODEL", "stabilityai/stable-diffusion-xl-base-1.0") | |
| if os.getenv("HUGGINGFACE_API_TOKEN"): | |
| try: | |
| from huggingface_hub import InferenceClient # type: ignore | |
| hf_client = InferenceClient(token=os.getenv("HUGGINGFACE_API_TOKEN")) | |
| hf_enabled = True | |
| except Exception as e: | |
| print("WARNING: huggingface_hub not available. Install with: pip install huggingface_hub pillow") | |
| print(f"Details: {e}\n") | |
| project_root = Path(__file__).resolve().parents[1] | |
| out_dir = project_root / "docs" / "slide_illustrations" | |
| out_dir.mkdir(parents=True, exist_ok=True) | |
| def enhance_prompt_with_gpt4(title: str, concept: str) -> str: | |
| """Use GPT-4o to expand the concept into a professional DALL-E 3 prompt.""" | |
| if not openai_enabled or openai_client is None: | |
| return concept # fallback to original | |
| try: | |
| system_prompt = ( | |
| "You are an expert at writing DALL-E 3 prompts for professional technical illustrations. " | |
| "Given a slide title and concept, expand it into a detailed, specific prompt that will produce " | |
| "a high-quality, structured infographic-style illustration. Focus on: clean composition, professional " | |
| "design, iconography without text labels, consistent color palette, and visual hierarchy. " | |
| "Return ONLY the enhanced prompt, no explanations." | |
| ) | |
| user_prompt = f"Slide title: {title}\n\nConcept: {concept}\n\nEnhanced DALL-E 3 prompt:" | |
| resp = openai_client.chat.completions.create( | |
| model="gpt-4o", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ], | |
| temperature=0.7, | |
| max_tokens=300, | |
| ) | |
| enhanced = resp.choices[0].message.content or concept | |
| return enhanced.strip() | |
| except Exception as e: | |
| print(f" Prompt enhancement failed, using original: {e}") | |
| return concept | |
| def try_openai_responses_api(prompt: str): | |
| """Try gpt-image-1 via Responses API (newest, highest quality).""" | |
| if not openai_enabled or openai_client is None: | |
| return None | |
| try: | |
| response = openai_client.responses.create( | |
| model="gpt-4.1-mini", # Use a model that supports image generation | |
| input=prompt, | |
| tools=[{"type": "image_generation"}], | |
| ) | |
| # Extract image data from response | |
| image_data = [ | |
| output.result | |
| for output in response.output | |
| if output.type == "image_generation_call" | |
| ] | |
| if image_data: | |
| return base64.b64decode(image_data[0]) | |
| return None | |
| except Exception as e: | |
| print(f" Responses API (gpt-image-1) failed: {e}") | |
| return None | |
| def try_openai(prompt: str): | |
| """Fallback: DALL-E 3 via Images API in landscape mode.""" | |
| if not openai_enabled or openai_client is None: | |
| return None | |
| try: | |
| resp = openai_client.images.generate( | |
| model="dall-e-3", | |
| prompt=prompt, | |
| size="1792x1024", # Landscape for slides | |
| quality="hd", | |
| response_format="b64_json", | |
| ) | |
| b64 = resp.data[0].b64_json | |
| if not b64: | |
| return None | |
| return base64.b64decode(b64) | |
| except Exception as e: | |
| print(f" DALL-E 3 generation failed: {e}") | |
| return None | |
| def try_hf(prompt: str): | |
| if not hf_enabled or hf_client is None: | |
| return None | |
| try: | |
| # text_to_image returns a PIL.Image.Image | |
| img = hf_client.text_to_image(prompt=prompt, model=hf_model) | |
| bio = BytesIO() | |
| img = img.convert("RGB") # ensure 3-channel | |
| img.save(bio, format="PNG") | |
| return bio.getvalue() | |
| except Exception as e: | |
| print(f" Hugging Face generation failed: {e}") | |
| return None | |
| for idx, title, prompt in SLIDE_PROMPTS: | |
| print(f"Generating slide {idx}: {title}") | |
| # Step 1: Enhance prompt with GPT-4o (mimics ChatGPT's internal process) | |
| print(f" Enhancing prompt with GPT-4o...") | |
| enhanced_prompt = enhance_prompt_with_gpt4(title, prompt) | |
| print(f" Enhanced prompt: {enhanced_prompt[:100]}...") | |
| img_bytes = None | |
| # Try newest model first (Responses API with gpt-image-1) | |
| if openai_enabled: | |
| img_bytes = try_openai_responses_api(enhanced_prompt) | |
| # Fallback to DALL-E 3 landscape if Responses API unavailable | |
| if img_bytes is None and openai_enabled: | |
| print(" Falling back to DALL-E 3 (landscape)...") | |
| img_bytes = try_openai(enhanced_prompt) | |
| # Fallback to Hugging Face | |
| if img_bytes is None and hf_enabled: | |
| img_bytes = try_hf(enhanced_prompt) | |
| if img_bytes is None: | |
| print( | |
| " Skipped: No image generated. Ensure either OPENAI_API_KEY (with access to gpt-image-1) " | |
| "or HUGGINGFACE_API_TOKEN is set." | |
| ) | |
| continue | |
| out_path = out_dir / f"slide-{idx}.png" | |
| with open(out_path, "wb") as f: | |
| f.write(img_bytes) | |
| print(f"Saved {out_path}") | |
| time.sleep(0.75) | |
| print(f"Done. Illustrations saved to: {out_dir}") | |
| if not openai_enabled: | |
| print("Note: OPENAI_API_KEY not set or OpenAI client unavailable.") | |
| if not hf_enabled: | |
| print("Note: HUGGINGFACE_API_TOKEN not set or huggingface_hub unavailable.") | |
| if __name__ == "__main__": | |
| main() | |