| import io | |
| import requests | |
| from PIL import Image | |
| import torch | |
| import numpy | |
| import gradio as gr | |
| from transformers import DetrImageProcessor, DetrForSegmentation, AutoImageProcessor, AutoModelForImageClassification | |
| from transformers.models.detr.feature_extraction_detr import rgb_to_id | |
| url = "http://images.cocodataset.org/val2017/000000039769.jpg" | |
| image = Image.open(requests.get(url, stream=True).raw) | |
| # feature_extractor = DetrImageProcessor.from_pretrained("facebook/post_process_panoptic_segmentation") | |
| # model = DetrForSegmentation.from_pretrained("facebook/post_process_panoptic_segmentation") | |
| # | |
| # # prepare image for the model | |
| # inputs = feature_extractor(images=image, return_tensors="pt") | |
| # | |
| # # forward pass | |
| # outputs = model(**inputs) | |
| # | |
| # # use the `post_process_panoptic` method of `DetrFeatureExtractor` to convert to COCO format | |
| # processed_sizes = torch.as_tensor(inputs["pixel_values"].shape[-2:]).unsqueeze(0) | |
| # result = feature_extractor.post_process_panoptic(outputs, processed_sizes)[0] | |
| # | |
| # # the segmentation is stored in a special-format png | |
| # panoptic_seg = Image.open(io.BytesIO(result["png_string"])) | |
| # panoptic_seg = numpy.array(panoptic_seg, dtype=numpy.uint8) | |
| # # retrieve the ids corresponding to each mask | |
| # panoptic_seg_id = rgb_to_id(panoptic_seg) | |
| preprocessor = AutoImageProcessor.from_pretrained("google/mobilenet_v2_1.0_224") | |
| model = AutoModelForImageClassification.from_pretrained("google/mobilenet_v2_1.0_224") | |
| inputs = preprocessor(images=image, return_tensors="pt") | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| # model predicts one of the 1000 ImageNet classes | |
| predicted_class_idx = logits.argmax(-1).item() | |
| print("Predicted class:", model.config.id2label[predicted_class_idx]) | |
| # gr.Image(image).launch() | |