Spaces:
Runtime error
Runtime error
| import os | |
| import shutil | |
| import glob | |
| import random | |
| from pprint import pprint | |
| DIR_COCO_VG = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw" | |
| DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_pretraining" | |
| OUT_DIR = "/gpfs/u/home/LMCG/LMCGljnn/scratch-shared/junyan/raw/blip2_all_data_ground" | |
| if __name__ == "__main__": | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| ccs_tars = glob.glob(os.path.join(DIR, "ccs_synthetic_filtered_large_ground", "*.tar")) | |
| coco_tars = glob.glob(os.path.join(DIR_COCO_VG, "karpathy_coco_wds_full_ground", "*.tar")) | |
| vg_tars = glob.glob(os.path.join(DIR_COCO_VG, "vg_wds_full_ground", "*.tar")) | |
| laion_part_tars = glob.glob(os.path.join(DIR, "laion_synthetic_filtered_large", "all_ground", "*.tar")) | |
| tars = [] | |
| tars.extend(ccs_tars) | |
| for _ in range(5): | |
| tars.extend(coco_tars) | |
| tars.extend(vg_tars) | |
| tars.extend(laion_part_tars) | |
| random.shuffle(tars) | |
| print(len(tars)) | |
| pprint(tars[:20]) | |
| for i, tar in enumerate(tars): | |
| dst = os.path.join(OUT_DIR, f"{str(i).zfill(6)}.tar") | |
| # print(tar, dst) | |
| os.symlink(tar, dst) | |