Spaces:
Runtime error
Runtime error
| # CHECK_EVERY=900 | |
| # DURATION_DAYS=10 | |
| # CHECK_TOTAL=$((DURATION_DAYS*86400/CHECK_EVERY)) | |
| # NEPOCH_PRE=99 | |
| # NEPOCH_SFT=159 | |
| # NAME="audio-gen-train_audiogen" | |
| # for (( i = 1; i <= $CHECK_TOTAL; i++ )) | |
| # do | |
| # RUNNING_JOBS=$(sacct -o JobName%-150,JobID,Partition%-15,State | grep -v inference | grep RUNNING | grep polar | sort) | |
| # PENDING_JOBS=$(sacct -o JobName%-150,JobID,Partition%-15,State | grep -v inference | grep PENDING | grep polar | sort) | |
| # for STATE in "RUNNING" "PENDING" "NOT-RUN" | |
| # do | |
| # echo "===========${STATE}==========" | |
| # if [[ ${STATE} == "RUNNING" && ${RUNNING_JOBS} =~ "${NAME}" ]]; then | |
| # echo ${NAME} | |
| # elif [[ ${STATE} == "PENDING" && ${PENDING_JOBS} =~ "${NAME}" ]]; then | |
| # echo ${NAME} | |
| # elif [[ ${STATE} == "NOT-RUN" && ! ${RUNNING_JOBS} =~ "${NAME}" && ! ${PENDING_JOBS} =~ "${NAME}" ]]; then | |
| # base_path="/lustre/fsw/portfolios/adlr/users/sreyang/ckpts/stable_llm/harmonai_train/" | |
| # # Find the last subfolder | |
| # last_subfolder=$(ls -d "$base_path"*/ | sort -V | tail -n 1) | |
| # # Find the last checkpoint in the subfolder | |
| # last_ckpt=$(ls "$last_subfolder/checkpoints/"*.ckpt | sort -V | tail -n 1) | |
| # echo $last_ckpt | |
| # sh submit_job.sh "True" $last_ckpt | |
| # sleep 1 | |
| # fi | |
| # done | |
| # echo "============================" | |
| # sleep $CHECK_EVERY | |
| # done | |
| CHECK_EVERY=900 | |
| DURATION_DAYS=10 | |
| CHECK_TOTAL=$((DURATION_DAYS*86400/CHECK_EVERY)) | |
| NEPOCH_PRE=99 | |
| NEPOCH_SFT=159 | |
| NAME="eval" | |
| for (( i = 1; i <= $CHECK_TOTAL; i++ )) | |
| do | |
| RUNNING_JOBS=$(sacct -o JobName%-150,JobID,Partition%-15,State | grep -v inference | grep RUNNING | grep polar | sort) | |
| PENDING_JOBS=$(sacct -o JobName%-150,JobID,Partition%-15,State | grep -v inference | grep PENDING | grep polar | sort) | |
| for STATE in "RUNNING" "PENDING" "NOT-RUN" | |
| do | |
| echo "===========${STATE}==========" | |
| if [[ ${STATE} == "RUNNING" && ${RUNNING_JOBS} =~ "${NAME}" ]]; then | |
| echo ${NAME} | |
| elif [[ ${STATE} == "PENDING" && ${PENDING_JOBS} =~ "${NAME}" ]]; then | |
| echo ${NAME} | |
| elif [[ ${STATE} == "NOT-RUN" && ! ${RUNNING_JOBS} =~ "${NAME}" && ! ${PENDING_JOBS} =~ "${NAME}" ]]; then | |
| sh submit.sh | |
| sleep 1 | |
| fi | |
| done | |
| echo "============================" | |
| sleep $CHECK_EVERY | |
| done |