Search-TTA-VLN
Collection
Test-Time Adaptation Framework for Multimodal Visual Navigation and Search (https://search-tta.github.io/)
•
8 items
•
Updated
Fine-tuned on laion/clap-htsat-fused.
For more information on usage, please refer to the Search-TTA-VLN Github repository here.
@inproceedings{tan2025searchtta,
title = {Search-TTA: A Multimodal Test-Time Adaptation Framework for Visual Search in the Wild},
author = {Derek Ming Siang Tan, Shailesh, Boyang Liu, Alok Raj, Qi Xuan Ang, Weiheng Dai, Tanishq Duhan, Jimmy Chiun, Yuhong Cao, Florian Shkurti, Guillaume Sartoretti},
booktitle = {Conference on Robot Learning},
year = {2025},
url = {https://arxiv.org/abs/2505.11350}
}
@misc{wu2024largescalecontrastivelanguageaudiopretraining,
title={Large-scale Contrastive Language-Audio Pretraining with Feature Fusion and Keyword-to-Caption Augmentation},
author={Yusong Wu and Ke Chen and Tianyu Zhang and Yuchen Hui and Marianna Nezhurina and Taylor Berg-Kirkpatrick and Shlomo Dubnov},
year={2024},
eprint={2211.06687},
archivePrefix={arXiv},
primaryClass={cs.SD},
url={https://arxiv.org/abs/2211.06687},
}