Spaces:
Sleeping
Sleeping
| import base64 | |
| import uuid | |
| from io import BytesIO | |
| from pathlib import Path | |
| from typing import Optional, Tuple, Union | |
| from PIL import Image, ImageDraw, ImageFont, ImageEnhance | |
| import pytesseract | |
| import torch | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| from diffusers import StableDiffusionPipeline | |
| # Device config - prefer GPU if available | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Initialize BLIP captioning model and processor once | |
| processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model = BlipForConditionalGeneration.from_pretrained( | |
| "Salesforce/blip-image-captioning-base" | |
| ).to(device) | |
| # Initialize Stable Diffusion pipeline once | |
| pipe = StableDiffusionPipeline.from_pretrained( | |
| "CompVis/stable-diffusion-v1-4", | |
| revision="fp16" if device == "cuda" else None, | |
| torch_dtype=torch.float16 if device == "cuda" else torch.float32, | |
| ) | |
| pipe.to(device) | |
| def extract_text_from_image(path: Union[str, Path]) -> str: | |
| """Extract text from image file at `path` using OCR (Tesseract).""" | |
| try: | |
| img = Image.open(path) | |
| text = pytesseract.image_to_string(img) | |
| return text.strip() | |
| except Exception as e: | |
| return f"[Error extracting text: {e}]" | |
| def caption_image(path: Union[str, Path]) -> str: | |
| """Generate a descriptive caption for image at `path` using BLIP.""" | |
| try: | |
| img = Image.open(path).convert("RGB") | |
| inputs = processor(img, return_tensors="pt").to(device) | |
| outputs = model.generate(**inputs) | |
| caption = processor.decode(outputs[0], skip_special_tokens=True) | |
| return caption | |
| except Exception as e: | |
| return f"[Error generating caption: {e}]" | |
| def generate_image(prompt: str, save_path: Optional[Union[str, Path]] = None) -> Path: | |
| """ | |
| Generate an image from text prompt using Stable Diffusion. | |
| Saves image to `save_path` or temporary file if None. | |
| Returns path to saved image. | |
| """ | |
| if not prompt.strip(): | |
| raise ValueError("Prompt must not be empty") | |
| result = pipe(prompt) | |
| image = result.images[0] | |
| if save_path is None: | |
| save_path = Path("/tmp") / f"image_{uuid.uuid4()}.png" | |
| else: | |
| save_path = Path(save_path) | |
| image.save(save_path) | |
| return save_path | |
| def generate_placeholder_image( | |
| prompt: str, | |
| size: Tuple[int, int] = (512, 512), | |
| bg_color: Tuple[int, int, int] = (173, 216, 230), | |
| font_path: Optional[Union[str, Path]] = None, | |
| font_size: int = 18, | |
| ) -> str: | |
| """ | |
| Create a placeholder image with the prompt text overlayed. | |
| Returns base64-encoded PNG image string. | |
| """ | |
| img = Image.new("RGB", size, color=bg_color) | |
| draw = ImageDraw.Draw(img) | |
| try: | |
| if font_path: | |
| font = ImageFont.truetype(str(font_path), font_size) | |
| else: | |
| font = ImageFont.load_default() | |
| except Exception: | |
| font = ImageFont.load_default() | |
| margin = 10 | |
| max_width = size[0] - 2 * margin | |
| y_text = margin | |
| lines = [] | |
| # Word-wrap text to fit width | |
| words = prompt.split() | |
| line = "" | |
| for word in words: | |
| test_line = f"{line} {word}".strip() | |
| width, _ = draw.textsize(test_line, font=font) | |
| if width <= max_width: | |
| line = test_line | |
| else: | |
| lines.append(line) | |
| line = word | |
| lines.append(line) | |
| for line in lines: | |
| draw.text((margin, y_text), line, fill="black", font=font) | |
| y_text += font.getsize(line)[1] + 4 | |
| buffer = BytesIO() | |
| img.save(buffer, format="PNG") | |
| encoded = base64.b64encode(buffer.getvalue()).decode("utf-8") | |
| return encoded | |
| def generate_image_base64(prompt: str) -> str: | |
| """ | |
| Generate image for prompt and return base64 PNG string. | |
| """ | |
| image_path = generate_image(prompt) | |
| with open(image_path, "rb") as f: | |
| encoded = base64.b64encode(f.read()).decode("utf-8") | |
| return encoded | |
| def overlay_text_on_image( | |
| image_path: Union[str, Path], | |
| text: str, | |
| position: Tuple[int, int] = (10, 10), | |
| font_path: Optional[Union[str, Path]] = None, | |
| font_size: int = 20, | |
| color: Tuple[int, int, int] = (255, 255, 255), | |
| outline_color: Tuple[int, int, int] = (0, 0, 0), | |
| outline_width: int = 2, | |
| ) -> Image.Image: | |
| """ | |
| Overlay given text on image at `image_path`. | |
| Supports optional font and outline. | |
| Returns PIL Image object. | |
| """ | |
| img = Image.open(image_path).convert("RGBA") | |
| txt_layer = Image.new("RGBA", img.size, (255, 255, 255, 0)) | |
| draw = ImageDraw.Draw(txt_layer) | |
| try: | |
| if font_path: | |
| font = ImageFont.truetype(str(font_path), font_size) | |
| else: | |
| font = ImageFont.load_default() | |
| except Exception: | |
| font = ImageFont.load_default() | |
| x, y = position | |
| # Draw outline for better visibility | |
| if outline_width > 0: | |
| for offset in range(-outline_width, outline_width + 1): | |
| if offset == 0: | |
| continue | |
| draw.text((x + offset, y), text, font=font, fill=outline_color + (255,)) | |
| draw.text((x, y + offset), text, font=font, fill=outline_color + (255,)) | |
| draw.text((x + offset, y + offset), text, font=font, fill=outline_color + (255,)) | |
| draw.text(position, text, font=font, fill=color + (255,)) | |
| combined = Image.alpha_composite(img, txt_layer) | |
| return combined.convert("RGB") | |
| def save_overlayed_image( | |
| image_path: Union[str, Path], | |
| text: str, | |
| output_path: Union[str, Path], | |
| **overlay_kwargs | |
| ) -> Path: | |
| """ | |
| Overlay text on image at `image_path` and save to `output_path`. | |
| Extra keyword args passed to overlay_text_on_image(). | |
| """ | |
| img = overlay_text_on_image(image_path, text, **overlay_kwargs) | |
| output_path = Path(output_path) | |
| img.save(output_path) | |
| return output_path | |
| def enhance_image_contrast(image_path: Union[str, Path], factor: float = 1.5) -> Image.Image: | |
| """ | |
| Enhance contrast of the image by the given factor. | |
| Returns a PIL Image object. | |
| """ | |
| img = Image.open(image_path) | |
| enhancer = ImageEnhance.Contrast(img) | |
| enhanced_img = enhancer.enhance(factor) | |
| return enhanced_img | |
| def save_enhanced_image(image_path: Union[str, Path], output_path: Union[str, Path], factor: float = 1.5) -> Path: | |
| """ | |
| Enhance contrast of an image and save to output_path. | |
| """ | |
| enhanced_img = enhance_image_contrast(image_path, factor) | |
| output_path = Path(output_path) | |
| enhanced_img.save(output_path) | |
| return output_path | |