import gradio as gr import json import requests from bs4 import BeautifulSoup from PIL import Image from io import BytesIO import os, shutil import zipfile import glob from unidecode import unidecode def extract_json_from_script(scripts, key_fragment): for script in scripts: if script.string and key_fragment in script.string: js = script.string break else: return None try: js_clean = js.split(' = ', 1)[1] brace_count = 0 for i, char in enumerate(js_clean): if char == '{': brace_count += 1 elif char == '}': brace_count -= 1 if brace_count == 0: js_clean = js_clean[:i+1] break return json.loads(js_clean) except Exception as e: print(f"❌ JSON extraction failed: {e}") return None def designer_to_shows(designer): designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower()) URL = f"https://www.vogue.com/fashion-shows/designer/{designer}" r = requests.get(URL) soup = BeautifulSoup(r.content, 'html5lib') data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'window.__PRELOADED_STATE__') if not data: print("❌ Could not find JSON script") return [] try: shows = [show['hed'] for show in data['transformed']['runwayDesignerContent']['designerCollections']] return shows except Exception as e: print(f"❌ Failed to parse shows list: {e}") return [] def designer_show_to_download_images(designer, show, progress): show = unidecode(show.replace(' ','-').lower()) designer = unidecode(designer.replace(' ','-').replace('.','-').replace('&','').replace('+','').replace('--','-').lower()) url = f"https://www.vogue.com/fashion-shows/{show}/{designer}" r = requests.get(url) soup = BeautifulSoup(r.content, 'html5lib') data = extract_json_from_script(soup.find_all('script', type='text/javascript'), 'runwayShowGalleries') if not data: print("❌ Could not extract image data") return None, [] try: items = data['transformed']['runwayShowGalleries']['galleries'][0]['items'] except Exception as e: print(f"❌ Could not find gallery images: {e}") return None, [] save_path = './images' if os.path.exists(save_path): shutil.rmtree(save_path) os.makedirs(save_path) image_path_list = [] for i, item in enumerate(progress.tqdm(items)): try: img_url = item['image']['sources']['md']['url'] response = requests.get(img_url) img = Image.open(BytesIO(response.content)) export_path = os.path.join(save_path, f"{designer}-{show}-{i+1}.png") img.save(export_path) image_path_list.append(export_path) print(f"✅ Downloaded: {img_url}") except Exception as e: print(f"⚠️ Error downloading image {i+1}: {e}") zip_file_name = f"{designer}-{show}.zip" with zipfile.ZipFile(zip_file_name, 'w') as f: for file in glob.glob(save_path + '/*'): f.write(file) return zip_file_name, image_path_list def get_collections(designer): new_options = designer_to_shows(designer) return gr.Dropdown.update(choices=new_options) def download_images(designer, collection, progress=gr.Progress(track_tqdm=True)): return designer_show_to_download_images(designer, collection, progress) with gr.Blocks() as demo: gr.Markdown( """ # Vogue Runway Scraper by [Tony Assi](https://www.tonyassi.com/) This space scrapes images from [Vogue Runway](https://www.vogue.com/fashion-shows) ![gucci-spring-2018-ready-to-wear-1](https://github.com/TonyAssi/Vogue-Runway-Scraper/assets/42156881/081f2c82-fbc5-419f-a0e8-52f8f1a8cdcd) 1. Enter the name of the designer (all designers can be found [here](https://huggingface.co/spaces/tonyassi/vogue-runway-scraper-dev/blob/main/designers.txt) or [here](https://www.vogue.com/fashion-shows)) 2. Click "Get Collection" button 3. Choose the collection from the dropdown 4. Click "Download Images" button """) designer_textbox = gr.Text(label="1) Designer", value="Gucci") get_collections_button = gr.Button("2) Get Collections") collections_dropdown = gr.Dropdown(choices=[], label='3) Collections', interactive=True) download_button = gr.Button("4) Download Images") output_file = gr.File(label='ZIP file') image_gallery = gr.Gallery(columns=4, preview=True) get_collections_button.click(get_collections, inputs=designer_textbox, outputs=collections_dropdown) download_button.click(download_images, inputs=[designer_textbox, collections_dropdown], outputs=[output_file, image_gallery]) if __name__ == "__main__": demo.launch()