Spaces:

koajoel
/

PolyFormer

Running

PolyFormer / data /create_finetuning_data.py

jiang

init commit

650c5f6 over 2 years ago

4.23 kB

	from refer.refer import REFER
	import numpy as np
	from PIL import Image
	import random
	import os
	from tqdm import tqdm

	import pickle
	from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \
	approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string


	max_length = 400

	data_root = './refer/data'
	datasets = ['refcoco', 'refcoco+', 'refcocog']

	image_dir = './datasets/images/mscoco/train2014'
	val_test_files = pickle.load(open("data/val_test_files.p", "rb"))

	combined_train_data = []

	for dataset in datasets:
	if dataset == 'refcoco':
	splits = ['train', 'val', 'testA', 'testB']
	splitBy = 'unc'
	elif dataset == 'refcoco+':
	splits = ['train', 'val', 'testA', 'testB']
	splitBy = 'unc'
	elif dataset == 'refcocog':
	splits = ['train', 'val']
	splitBy = 'umd'

	save_dir = f'datasets/finetune/{dataset}'
	os.makedirs(save_dir, exist_ok=True)
	for split in splits:
	num_pts = []
	max_num_pts = 0
	file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv")
	print("creating ", file_name)

	uniq_ids = []
	image_ids = []
	sents = []
	coeffs_strings = []
	img_strings = []

	writer = open(file_name, 'w')
	refer = REFER(data_root, dataset, splitBy)

	ref_ids = refer.getRefIds(split=split)

	for this_ref_id in tqdm(ref_ids):
	this_img_id = refer.getImgIds(this_ref_id)
	this_img = refer.Imgs[this_img_id[0]]
	fn = this_img['file_name']
	img_id = fn.split(".")[0].split("_")[-1]

	# load image
	img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")

	# convert image to string
	img_base64 = image_to_base64(img, format='jpeg')

	# load mask
	ref = refer.loadRefs(this_ref_id)
	ref_mask = np.array(refer.getMask(ref[0])['mask'])
	annot = np.zeros(ref_mask.shape)
	annot[ref_mask == 1] = 1 # 255
	annot_img = Image.fromarray(annot.astype(np.uint8), mode="P")
	annot_base64 = image_to_base64(annot_img, format='png')

	polygons = refer.getPolygon(ref[0])['polygon']

	polygons_processed = []
	for polygon in polygons:
	# make the polygon clockwise
	if not is_clockwise(polygon):
	polygon = revert_direction(polygon)

	# reorder the polygon so that the first vertex is the one closest to image origin
	polygon = reorder_points(polygon)
	polygons_processed.append(polygon)

	polygons = sorted(polygons_processed, key=lambda x: (x[0] 2 + x[1] 2, x[0], x[1]))
	polygons_interpolated = interpolate_polygons(polygons)

	polygons = approximate_polygons(polygons, 5, max_length)

	pts_string = polygons_to_string(polygons)
	pts_string_interpolated = polygons_to_string(polygons_interpolated)

	# load box
	box = refer.getRefBox(this_ref_id) # x,y,w,h
	x, y, w, h = box
	box_string = f'{x},{y},{x + w},{y + h}'

	max_num_pts = max(max_num_pts, check_length(polygons))

	num_pts.append(check_length(polygons))
	# load text
	ref_sent = refer.Refs[this_ref_id]
	for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])):
	uniq_id = f"{this_ref_id}_{i}"
	instance = '\t'.join(
	[uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64,
	pts_string_interpolated]) + '\n'
	writer.write(instance)

	if img_id not in val_test_files and split == 'train': # filtered out val/test files
	combined_train_data.append(instance)
	writer.close()

	random.shuffle(combined_train_data)
	file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv")
	print("creating ", file_name)
	writer = open(file_name, 'w')
	writer.writelines(combined_train_data)
	writer.close()