Spaces:

Anirban0011
/

multimodal-shopee-finetune

Sleeping

App Files Files Community

Anirban0011 commited on Sep 26

Commit

fe0eb36

1 Parent(s): 9d563c0

init

Browse files

Files changed (14) hide show

.gitignore +2 -0
Dockerfile +16 -0
app.py +52 -0
inference.py +56 -0
pp/__init__.py +0 -0
pp/albu.py +8 -0
requirements.txt +13 -0
utils/__init__.py +0 -0
utils/ckpts.py +15 -0
utils/dataset.py +51 -0
utils/filterfunc.py +57 -0
utils/knn.py +49 -0
utils/predict.py +77 -0
utils/utilfuncs.py +86 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ env
2	+ *.pyc

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from fastapi import FastAPI, File, UploadFile, Form
+import uvicorn
+import torch
+import os
+import nltk
+nltk.download("stopwords")
+import numpy as np
+from typing import List
+from inference import inference
+from code_base.utils import CFG
+TKN_PATH= ["bert-base-uncased"]
+IMG_SIZE = 256
+BATCH_SIZE = 32
+img = True
+CFG.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+app = FastAPI(title="shopee-test-app")
+@app.get("/")
+async def root():
+    return {"status": "ok", "message": "Space is running"}
+@app.post("/predict")
+async def predict_image(files: List[UploadFile] = File(...),
+                          texts: List[str] = Form(...)):
+    li, lt= [], []
+    for file, text in zip(files, texts):
+        contents = await file.read()
+        li.append(contents)
+        lt.append(text)
+    res = inference(li=li,
+                    lt=lt,
+                    IMG_SIZE=IMG_SIZE,
+                    TKN_PATH=TKN_PATH,
+                    BATCH_SIZE=BATCH_SIZE
+                    )
+    msg = "products matched" if res else "products not matched"
+    return {"message" : f"{msg}"}
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=8000)

inference.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import gc
+import torch
+from utils.predict import predict
+from utils.filterfunc import filter_match_titles
+from utils.ckpts import img_ckpt, txt_ckpt
+from utils.utilfuncs import gen_data, load_model, return_feas
+img_backbone = ["timm/eca_nfnet_l1.ra2_in1k"]
+txt_backbone = ["google-bert/bert-base-uncased"]
+def clean():
+    gc.collect()
+def inference(li, lt, IMG_SIZE,
+              TKN_PATH,
+              BATCH_SIZE,
+              num_workers = 4,
+              ):
+    dataloader_img, dataloader_txt = gen_data(li,
+                                              lt,
+                                              IMG_SIZE,
+                                              BATCH_SIZE,
+                                              TKN_PATH[0],
+                                              num_workers)
+    img_model = [load_model(backbone=img_backbone[i],
+                            ckpt_path=img_ckpt[i],
+                            img=True)
+             for i in range(len(img_backbone))]
+    img_feas = torch.cat([return_feas(
+                img_model[i],
+                dataloader_img, img=True)
+            for i in range(len(img_backbone))], dim=1)
+    txt_model = [load_model(backbone=TKN_PATH[i], ckpt_path=txt_ckpt[i])
+             for i in range(len(txt_backbone))]
+    txt_feas = torch.cat([return_feas(
+                txt_model[i],
+                dataloader_txt)
+            for i in range(len(txt_backbone))], dim=1)
+    match_final =  predict(img_feas=img_feas,
+                   txt_feas=txt_feas)
+    match_final = filter_match_titles(match_final, title_list=lt)
+    assert len(match_final == 2)
+    return set(match_final[0]) == set(match_final[1])

pp/__init__.py ADDED Viewed

File without changes

pp/albu.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import albumentations as A
+def transform(size):
+    transforms = A.Compose([
+        A.Resize(size, size),
+        A.Normalize()
+        ])
+    return transforms

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torch
+fastapi
+uvicorn[standard]
+huggingface_hub
+albumentations
+transformers
+scikit-learn
+unidecode
+nltk
+timm
+faiss-cpu
+hf-xet
+git+https://github.com/Anirban0011/shopee-product-matching.git

utils/__init__.py ADDED Viewed

File without changes

utils/ckpts.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from huggingface_hub import hf_hub_download
+REPO_ID = "Anirban0011/multimodal-shopee-finetune"
+def get_path(filename, repo):
+    path = hf_hub_download(repo_id=repo, filename=filename)
+    return path
+img_path = get_path(repo=REPO_ID,
+                            filename="img_model_eca_nfnet_l1.ra2_in1k.pth")
+txt_path = get_path(repo=REPO_ID,
+                            filename="txt_model_bert-base-uncased_35.pth")
+img_ckpt = [img_path]
+txt_ckpt = [txt_path]

utils/dataset.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import io
+import torch
+import numpy as np
+from PIL import Image
+from transformers import AutoTokenizer
+from torch.utils.data import Dataset
+class ImageDataset(Dataset):
+    def __init__(self, li, transform=None):
+        self.li = li
+        self.transform = transform
+    def __len__(self):
+        return len(self.li)
+    def __getitem__(self, index):
+        img_byte = self.li[index]
+        img = Image.open(io.BytesIO(img_byte)).convert("RGB")
+        img = np.array(img)
+        img = img.copy()
+        if self.transform is not None:
+            img = self.transform(image=img)
+            img = img["image"]
+        img = img.astype(np.float32)
+        img = img.transpose(2, 0, 1)
+        return torch.tensor(img).float()
+class TextDataset(Dataset):
+    def __init__(self, li, tokenizer=None):
+        self.li = li
+        self.to = AutoTokenizer.from_pretrained(tokenizer)
+    def __len__(self):
+        return len(self.li)
+    def __getitem__(self, index):
+        text = self.li[index]
+        text = self.tokenizer(
+            text,
+            padding="max_length",
+            truncation=True,
+            max_length=35,
+            return_tensors="pt",
+        )
+        input_ids = text["input_ids"][0]
+        attention_mask = text["attention_mask"][0]
+        return input_ids, attention_mask

utils/filterfunc.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import regex
+# adapted from kaggle.com/code/slawekbiel/resnet18-0-772-public-lb/notebook
+measurements = {
+    'weight': [('mg',1), ('g', 1000), ('gr', 1000), ('gram', 1000), ('kg', 1000000)],
+    'length': [('mm',1), ('cm', 10), ('m',1000), ('meter', 1000)],
+    'pieces': [ ('pc',1)],
+    'memory': [('gb', 1)],
+    'volume': [('ml', 1), ('l', 1000), ('liter',1000)]
+}
+def to_num(x, mult=1):
+    x = x.replace(',','.')
+    return int(float(x)*mult)
+def extract_unit(tit, m):
+    pat = rf'\W(\d+(?:[\,\.]\d+)?) ?{m}s?\W'
+    matches = regex.findall(pat, tit, overlapped=True)
+    return set(matches)
+def extract(tit):
+    res =dict()
+    tit = ' '+tit.lower()+' '
+    for cat, units in measurements.items():
+        cat_values=set()
+        for unit_name, mult in units:
+            values = extract_unit(tit, unit_name)
+            values = {to_num(v, mult) for v in values}
+            cat_values = cat_values.union(values)
+        if cat_values:
+            res[cat] = cat_values
+    return res
+def match_measures(m1, m2):
+    k1,k2 = set(m1.keys()), set(m2.keys())
+    common = k1.intersection(k2)
+    if not common:
+        return True
+    for key in common:
+        s1,s2 = m1[key], m2[key]
+        if s1.intersection(s2):
+            return True
+    return False
+def filter_match_titles(matches : list, title_list): # filter matches override
+    for i in range(len(matches)):
+        item_title = extract(title_list)
+        l=[]
+        for match in matches[i]:
+            if match == i:
+                l.append(i)
+                continue
+            match_title = extract(title_list)
+            if (match_measures(item_title, match_title)):
+                l.append(match)
+        matches[i] = l
+    return matches

utils/knn.py ADDED Viewed

	@@ -0,0 +1,49 @@

+from code_base.utils import CFG
+import faiss
+import torch
+def build_faiss(feas, dim):
+    if CFG.device.type == "cpu":
+        index = faiss.IndexFlatIP(dim)
+    else :
+        res = faiss.StandardGpuResources()
+        index = faiss.GpuIndexFlatIP(res, dim)
+    index.add(feas)
+    return index
+def get_batches(bs, n_batch, feas):
+    batches = []
+    for i in range(n_batch):
+        left = bs * i
+        right = bs * (i+1)
+        if i == n_batch - 1:
+            right = feas.shape[0]
+        batches.append(feas[left:right,:])
+    return batches
+def get_matches(bs, n_batch, feas, dim, k=51):
+    index = build_faiss(feas, dim)
+    m=[]
+    s=[]
+    for batch in get_batches(bs, n_batch, feas):
+        batch = batch.to(CFG.device)
+        sims, matches = index.search(batch, k)
+        m.append(matches)
+        s.append(sims)
+    m = torch.cat(m, dim=0).to(torch.int32)
+    s = torch.cat(s, dim=0)
+    return m,s
+def th_matches(bs, n_batch, matches, sims, th):
+    matches = get_batches(bs, n_batch, matches)
+    sims = get_batches(bs, n_batch, sims)
+    m = []
+    s=[]
+    for (batch_m, batch_s) in zip(matches, sims):
+        batch_m = batch_m.cpu().numpy()
+        batch_s = batch_s.cpu().numpy()
+        mask = (batch_s > th)
+        for row in range(len(mask)):
+            m.append(batch_m[row][mask[row]].tolist())
+            s.append(batch_s[row][mask[row]].tolist())
+    return m, s

utils/predict.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import torch
+import numpy as np
+from code_base.utils import CFG
+import torch.nn.functional as F
+from functools import reduce
+from utils.knn import get_matches, th_matches
+K = 51
+di = 1792
+dt = 1024
+dc = di+dt
+n_batch = 10
+def filter_embeddings(feas, matches, sims):
+    feas = feas.detach().cpu()
+    new_feas = feas.clone()
+    for i in range(feas.shape[0]):
+        cur_feas = feas[matches[i]]
+        weights = torch.unsqueeze(torch.Tensor(sims[i]), 1)
+        new_feas[i] = weights.T@cur_feas
+    new_feas = F.normalize(new_feas)
+    return new_feas.to(CFG.device)
+def filter_matches(matches, sims, th=1.0, k=3, dist=1e-2):
+    top_matches = [row[:k] for row in matches]
+    top_sims = [row[:k] for row in sims]
+    for i in range(len(matches)):
+        if len(matches[i]) < k+1:
+            continue
+        dist_1 = sims[i][k-2] - sims[i][k-1]
+        dist_2 = sims[i][k-1] - sims[i][k]
+        if dist_2 < dist:
+            continue
+        if th*dist_1 < dist_2:
+            matches[i] = top_matches[i]
+            sims[i] = top_sims[i]
+    return matches, sims
+def union_matches(*lists):
+    matches = []
+    for group in zip(*lists):
+        matches.append(reduce(np.union1d, group).tolist())
+    return matches
+def predict(img_feas, txt_feas):
+    img_feas, txt_feas = F.normalize(img_feas).to(CFG.device) , F.normalize(txt_feas).to(CFG.device)
+    comb_feas = F.normalize(torch.cat([img_feas, txt_feas], dim=1)).to(CFG.device)
+    bs  = len(comb_feas) // n_batch
+    img_matches, img_sims = get_matches(bs, n_batch, img_feas, di, k=K)
+    text_matches, text_sims = get_matches(bs, n_batch, txt_feas, dt, k=K)
+    comb_matches, comb_sims = get_matches(bs, n_batch, comb_feas, dc, k=K)
+    img_final, img_sims = th_matches(bs, n_batch, img_matches, img_sims, 0.704)
+    text_final, text_sims = th_matches(bs, n_batch, text_matches, text_sims, 0.764)
+    comb_final, comb_sims = th_matches(bs, n_batch, comb_matches, comb_sims, 0.52)
+    comb_feas = filter_embeddings(comb_feas, comb_final, comb_sims)
+    comb_matches, comb_sims = get_matches(bs, n_batch, comb_feas, dc, k=K)
+    comb_final, comb_sims = th_matches(bs, n_batch, comb_matches, comb_sims, 0.9)
+    img_final,_ = filter_matches(img_final, img_sims, 1.1, 4, 2e-2)
+    text_final,_ = filter_matches(text_final, text_sims, 1.2, 4, 2e-2)
+    comb_final,_ = filter_matches(comb_final, comb_sims, 1.0, 3, 2e-2)
+    match_final = union_matches(img_final, text_final, comb_final)
+    return match_final

utils/utilfuncs.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import torch
+from pp.albu import transform
+from code_base.utils import CFG
+from torch.utils.data import DataLoader
+from utils.dataset import ImageDataset, TextDataset
+from code_base.pipeline import ImgEncoder, TextEncoder
+def gen_data(li,
+                 lt,
+                 IMG_SIZE,
+                 BATCH_SIZE,
+                 TKN_PATH,
+                 num_workers):
+    data_img = ImageDataset(li=li, transform=transform(size=IMG_SIZE))
+    data_txt = TextDataset(li=lt, tokenizer=TKN_PATH)
+    dataloader_img = DataLoader(data_img, batch_size=BATCH_SIZE, shuffle=False,
+                            num_workers=num_workers)
+    dataloader_txt = DataLoader(data_txt, batch_size=BATCH_SIZE, shuffle= False,
+                            num_workers=num_workers)
+    return dataloader_img, dataloader_txt
+def load_model(backbone, ckpt_path, num_classes=11014, img = False):
+    if img:
+        model = ImgEncoder(num_classes, backbone = backbone, pretrained = False, p=4)
+    else:
+        model = TextEncoder(num_classes, backbone = backbone, eval_model=True)
+    ckpt = torch.load(ckpt_path, weights_only=True, map_location = CFG.device)
+    new_state_dict = {}
+    for k, v in ckpt.items():
+        new_key = k.replace("module.", "")  # remove module. prefix
+        new_state_dict[new_key] = v
+    model.load_state_dict(new_state_dict)
+    model = model.to(CFG.device)
+    print(f"model {backbone} loaded successfully")
+    return model
+class gen_feas:
+    def __init__(self, model, dataloader):
+        self.model = model
+        self.dataloader = dataloader
+    def gen_img_feas(self):
+        self.model.eval()
+        FEAS = []
+        with torch.no_grad():
+            for batch_idx, (images) in enumerate(self.dataloader):
+                images = images.to(CFG.device)
+                logits = self.model(images)
+                FEAS += [logits.detach().cpu()]
+        FEAS = torch.cat(FEAS).cpu().numpy()
+        return FEAS
+    def gen_txt_feas(self):
+        self.model.eval()
+        FEAS = []
+        with torch.no_grad():
+            for batch_idx, (inp_ids, att_masks) in enumerate(self.dataloader):
+                inp_ids, att_masks = inp_ids.to(CFG.device), att_masks.to(CFG.device)
+                logits = self.model(inp_ids, att_masks)
+                FEAS += [logits.detach().cpu()]
+        FEAS = torch.cat(FEAS).cpu().numpy()
+        return FEAS
+def return_feas(model, dataloader, img=False):
+    if img:
+        feas = gen_feas(model, dataloader).gen_img_feas()
+    else:
+        feas = gen_feas(model, dataloader).gen_txt_feas()
+    feas = torch.tensor(feas).to(CFG.device)
+    return feas