import os os.system("pip install xtcocotools>=1.12") os.system("pip install 'mmengine>=0.6.0'") os.system("pip install 'mmcv>=2.0.0rc4,<2.1.0'") os.system("pip install 'mmdet>=3.0.0,<4.0.0'") os.system("pip install 'mmpose'") import PIL import cv2 import numpy as np import torch from mmpose.apis import MMPoseInferencer from mmpose.apis import inference_topdown, init_model from mmpose.utils import register_all_modules register_all_modules() import gradio as gr import warnings warnings.filterwarnings("ignore") def save_image(img, img_path): # Convert PIL image to OpenCV image img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) # Save OpenCV image cv2.imwrite(img_path, img) def predict_pose(img): img_path = "input_img.jpg" save_image(img, img_path) result = mmpose_coco(img_path) keypoints = result[0].pred_instances['keypoints'][0] # Create a dictionary to store keypoints and their names keypoints_data = { 'keypoints': keypoints.tolist(), 'keypoint_names': [ 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', 'right_ankle' ] } return (img, keypoints_data) def mmpose_coco(img_path, config_file = 'mmpose/td-hm_hrnet-w48_8xb32-210e_coco-256x192.py', checkpoint_file = 'mmpose/td-hm_hrnet-w48_8xb32-210e_coco-256x192-0e67c616_20220913.pth'): device = torch.cuda.current_device() if torch.cuda.is_available() else 'cpu' # coco keypoints: # https://github.com/open-mmlab/mmpose/blob/master/mmpose/datasets/datasets/top_down/topdown_coco_dataset.py#L28 model = init_model(config_file, checkpoint_file, device=device) results = inference_topdown(model, img_path) return results # download_test_image() input_image = gr.inputs.Image(type='pil', label="Original Image") output_image = gr.outputs.Image(type="pil", label="Output Image") output_text = gr.outputs.Textbox(label="Output Text") title = "MMPose detection for ShopByShape" iface = gr.Interface(fn=predict_pose, inputs=[input_image], outputs=[output_image, output_text], title=title) iface.launch()