Spaces:

qoobeeshy
/

yolo-document-layout-analysis

Running

App Files Files Community

saad noor commited on Aug 10, 2023

Commit

dab2f85

1 Parent(s): cd5e9c8

init commit

Browse files

Files changed (8) hide show

.gitignore +1 -0
0040da34-25c8-4a5a-a6aa-36733ea3b8eb.png +0 -0
app.py +262 -0
e100_img.pt +3 -0
e50_aug.pt +3 -0
epoch50hgeq2.pt +3 -0
raytuneYolo50epoch.pt +3 -0
requirements.txt +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ yoloenv/

0040da34-25c8-4a5a-a6aa-36733ea3b8eb.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import gradio as gr
+import requests
+import torch
+import os
+from tqdm import tqdm
+# import wandb
+from ultralytics import YOLO
+import cv2
+import numpy as np
+import pandas as pd
+from skimage.transform import resize
+from skimage import img_as_bool
+from skimage.morphology import convex_hull_image
+import json
+# wandb.init(mode='disabled')
+def tableConvexHull(img, masks):
+    mask=np.zeros(masks[0].shape,dtype="bool")
+    for msk in masks:
+        temp=msk.cpu().detach().numpy();
+        chull = convex_hull_image(temp);
+        mask=np.bitwise_or(mask,chull)
+    return mask
+def cls_exists(clss, cls):
+    indices = torch.where(clss==cls)
+    return len(indices[0])>0
+def empty_mask(img):
+    mask = np.zeros(img.shape[:2], dtype="uint8")
+    return np.array(mask, dtype=bool)
+def extract_img_mask(img_model, img, config):
+    res_dict = {
+        'status' : 1
+    }
+    res = get_predictions(img_model, img, config)
+    if res['status']==-1:
+        res_dict['status'] = -1
+    elif res['status']==0:
+        res_dict['mask']=empty_mask(img)
+    else:
+        masks = res['masks']
+        boxes = res['boxes']
+        clss = boxes[:, 5]
+        mask = extract_mask(img, masks, boxes, clss, 0)
+        res_dict['mask'] = mask
+    return res_dict
+def get_predictions(model, img2, config):
+    res_dict = {
+        'status': 1
+    }
+    try:
+        for result in model.predict(source=img2, verbose=False, retina_masks=config['rm'],\
+                                    imgsz=config['sz'], conf=config['conf'], stream=True,\
+                                    classes=config['classes']):
+            try:
+                res_dict['masks'] = result.masks.data
+                res_dict['boxes'] = result.boxes.data
+                del result
+                return res_dict
+            except Exception as e:
+                res_dict['status'] = 0
+                return res_dict
+    except:
+        res_dict['status'] = -1
+        return res_dict
+def extract_mask(img, masks, boxes, clss, cls):
+    if not cls_exists(clss, cls):
+        return empty_mask(img)
+    indices = torch.where(clss==cls)
+    c_masks = masks[indices]
+    mask_arr = torch.any(c_masks, dim=0).bool()
+    mask_arr = mask_arr.cpu().detach().numpy()
+    mask = mask_arr
+    return mask
+def get_masks(img, model, img_model, flags, configs):
+    response = {
+        'status': 1
+    }
+    ans_masks = []
+    img2 = img
+#     ***** Getting paragraph and text masks
+    res = get_predictions(model, img2, configs['paratext'])
+    if res['status']==-1:
+        response['status'] = -1
+        return response
+    elif res['status']==0:
+        for i in range(2): ans_masks.append(empty_mask(img))
+    else:
+        masks, boxes = res['masks'], res['boxes']
+        clss = boxes[:, 5]
+        for cls in range(2):
+            mask = extract_mask(img, masks, boxes, clss, cls)
+            ans_masks.append(mask)
+#     ***** Getting image and table masks
+    res2 = get_predictions(model, img2, configs['imgtab'])
+    if res2['status']==-1:
+        response['status'] = -1
+        return response
+    elif res2['status']==0:
+        for i in range(2): ans_masks.append(empty_mask(img))
+    else:
+        masks, boxes = res2['masks'], res2['boxes']
+        clss = boxes[:, 5]
+        if cls_exists(clss, 2):
+            img_res = extract_img_mask(img_model, img, configs['image'])
+            if img_res['status'] == 1:
+                img_mask = img_res['mask']
+            else:
+                response['status'] = -1
+                return response
+        else:
+            img_mask = empty_mask(img)
+        ans_masks.append(img_mask)
+        if cls_exists(clss, 3):
+            indices = torch.where(clss==3)
+            tbl_mask = tableConvexHull(img, masks[indices])
+        else:
+            tbl_mask = empty_mask(img)
+        ans_masks.append(tbl_mask)
+    if not configs['paratext']['rm']:
+        h, w, c = img.shape
+        for i in range(4):
+            ans_masks[i] = img_as_bool(resize(ans_masks[i], (h, w)))
+    response['masks'] = ans_masks
+    return response
+def overlay(image, mask, color, alpha, resize=None):
+    """Combines image and its segmentation mask into a single image.
+    https://www.kaggle.com/code/purplejester/showing-samples-with-segmentation-mask-overlay
+    Params:
+        image: Training image. np.ndarray,
+        mask: Segmentation mask. np.ndarray,
+        color: Color for segmentation mask rendering.  tuple[int, int, int] = (255, 0, 0)
+        alpha: Segmentation mask's transparency. float = 0.5,
+        resize: If provided, both image and its mask are resized before blending them together.
+        tuple[int, int] = (1024, 1024))
+    Returns:
+        image_combined: The combined image. np.ndarray
+    """
+    color = color[::-1]
+    colored_mask = np.expand_dims(mask, 0).repeat(3, axis=0)
+    colored_mask = np.moveaxis(colored_mask, 0, -1)
+    masked = np.ma.MaskedArray(image, mask=colored_mask, fill_value=color)
+    image_overlay = masked.filled()
+    if resize is not None:
+        image = cv2.resize(image.transpose(1, 2, 0), resize)
+        image_overlay = cv2.resize(image_overlay.transpose(1, 2, 0), resize)
+    image_combined = cv2.addWeighted(image, 1 - alpha, image_overlay, alpha, 0)
+    return image_combined
+general_model_path = 'e50_aug.pt'
+image_model_path = 'e100_img.pt'
+general_model = YOLO(general_model_path)
+image_model = YOLO(image_model_path)
+sample_path = ['0040da34-25c8-4a5a-a6aa-36733ea3b8eb.png']
+flags = {
+    'hist': False,
+    'bz': False
+}
+configs = {}
+configs['paratext'] = {
+    'sz' : 640,
+    'conf': 0.25,
+    'rm': True,
+    'classes': [0, 1]
+}
+configs['imgtab'] = {
+    'sz' : 640,
+    'conf': 0.35,
+    'rm': True,
+    'classes': [2, 3]
+}
+configs['image'] = {
+    'sz' : 640,
+    'conf': 0.35,
+    'rm': True,
+    'classes': [0]
+}
+def evaluate(img_path, model=general_model, img_model=image_model,\
+          configs=configs, flags=flags):
+    print('starting')
+    img = cv2.imread(img_path)
+    res = get_masks(img, general_model, image_model, flags, configs)
+    if res['status']==-1:
+        for idx in configs.keys():
+            configs[idx]['rm'] = False
+        return evaluate(img, model, img_model, flags, configs)
+    else:
+        masks = res['masks']
+    color_map = {
+        0 : (255, 0, 0),
+        1 : (0, 255, 0),
+        2 : (0, 0, 255),
+        3 : (255, 255, 0),
+    }
+    for i, mask in enumerate(masks):
+        img = overlay(image=img, mask=mask, color=color_map[i], alpha=0.4)
+    print('finishing')
+    return img
+# output = evaluate(img_path=sample_path, model=general_model, img_model=image_model,\
+#           configs=configs, flags=flags)
+inputs_img = [
+    gr.components.Video(type="filepath", label="Input Video"),
+]
+outputs_img = [
+    gr.components.Image(type="numpy", label="Output Image"),
+]
+inputs_image = [
+    gr.components.Image(type="filepath", label="Input Image"),
+]
+outputs_image = [
+    gr.components.Image(type="numpy", label="Output Image"),
+]
+interface_image = gr.Interface(
+    fn=evaluate,
+    inputs=inputs_image,
+    outputs=outputs_image,
+    title="Document Layout Segmentor",
+    examples=sample_path,
+    cache_examples=True,
+)

e100_img.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7424265a528fd1a2f741bb48a3586e69496de55f14e4a4c5ba867e83c2d159f8
+size 54786656

e50_aug.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12dba7a7156750342fb35ef2305a0bffa31615258aced63811e9220990f1f0a3
+size 54792992

epoch50hgeq2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40c00f2b620f539f9054bd17f4fbda064782aa64c089f1c366a607189a112acf
+size 218670661

raytuneYolo50epoch.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:971d22657b3a263a44150bbcb9a2a0726e15c3460a0f6a4810ae949c623bc5fa
+size 54793056

requirements.txt ADDED Viewed

Binary file (2.46 kB). View file