Spaces:
Runtime error
Runtime error
| #SHARD_NUM=8 | |
| SHARDED_CORPUS_PREFIX="/root/Corpus/CAsT22_msmarcov2_kilt_flattened_8shards" | |
| SHARDED_EMBEDDING_PREFIX="/ssd3/geonminkim/indexes/CAsT_21_22_msmarcov2_kilt/dense" | |
| START_IDX=4 | |
| END_IDX=5 | |
| SHARD_NUM=8 | |
| GPU_ID=2 | |
| for SHARD_IDX in $(seq $START_IDX $END_IDX); do | |
| echo "encoding for SHARD_IDX = $SHARD_IDX" | |
| python -m pyserini.encode \ | |
| input --corpus $SHARDED_CORPUS_PREFIX/shard_$SHARD_IDX \ | |
| --fields text \ | |
| --shard-id $SHARD_IDX \ | |
| --shard-num $SHARD_NUM \ | |
| output --embeddings $SHARDED_EMBEDDING_PREFIX/shard_$SHARD_IDX \ | |
| --to-faiss \ | |
| encoder --encoder castorini/tct_colbert-v2-msmarco-cqe \ | |
| --fields text \ | |
| --device cuda:$GPU_ID \ | |
| --batch 128 \ | |
| --fp16 | |
| done | |