#!/bin/bash #SBATCH --job-name=dna_mdns #SBATCH --partition=coe-gpu #SBATCH --gres=gpu:H100:1 #SBATCH --time=16:00:00 # max 16 GPU hours, i.e., time <= 16h / num of GPUs #SBATCH --mem-per-gpu=60G # maximum GPU RAM, 141G for H200, 94G for H100 # in the current setting, 40G is enough for num_replicates=2 and 80G is enough for num_replicates=4 #SBATCH --cpus-per-task=2 #SBATCH --wait-all-nodes=1 #SBATCH --output=../outputs/%j.%x/.log HOME_LOC= "" # Fill in directory of the repo SAVE_PATH = "" # Fill in directory to save the checkpoints BASE_PATH = "" # Fill in directory of the pretrained checkpoints, e.g., "...../data_and_model/" SCRIPT_LOC=$HOME_LOC/tr2d2/dna LOG_LOC=$HOME_LOC/tr2d2/dna/logs DATE=$(date +%m_%d) mkdir -p "$LOG_LOC" # set 3 have skip connection # =================================================================== python $SCRIPT_LOC/finetune.py \ --base_path $BASE_PATH \ --device "cuda:0" \ --noise_removal \ --save_path_dir $SAVE_PATH \ --wdce_num_replicates 16 \ --buffer_size 160 \ --batch_size 160 \ --seq_length 200 \ --num_children 32 \ --total_num_steps 128 \ --num_iter 5 \ --resample_every_n_step 5 \ --eval_every_n_epochs 10 \ --num_epochs 60000 \ --exploration 0.1 \ --save_every_n_epoch 2000 \ --alpha 0.1 \ --centering \ --grad_clip \ --reward_clip \ --reward_clip_value 15.0 \ --reset_tree