Selected Publications

Please see my Google Scholar profile for a full list of publications.

📚 Data-Centric Computer Vision (Dataset Curation, Data Generation, Benchmarking)

Publication thumbnail
MM-WLAuslan: Multi-View Multi-Modal Word-Level Australian Sign Language Recognition Dataset
Xin Shen, Heming Du, Hongwei Sheng, Shuyun Wang, Hui Chen, Huiqiang Chen, Zhuojie Wu, Xiaobiao Du, Jiaying Ying, Ruihan Lu, Qingzheng Xu, Xin Yu: NeurIPS, 2024.
PDF Project Page BibTeX
@inproceedings{shen2024mm,
  title={MM-WLAuslan: Multi-View Multi-Modal Word-Level Australian Sign Language Recognition Dataset},
  author={Shen, Xin and Du, Heming and Sheng, Hongwei and Wang, Shuyun and Chen, Hui and Chen, Huiqiang and Wu, Zhuojie and Du, Xiaobiao and Ying, Jiaying and Lu, Ruihan and others},
  booktitle={Advances in Neural Information Processing Systems},
  volume={37},
  pages={69700--69715},
  year={2024}
}
        
Publication thumbnail
LDPose: Towards Inclusive Human Pose Estimation for Limb-Deficient Individuals in the Wild
Jiaying Ying, Heming Du, Kaihao Zhang, Lincheng Li, Xin Yu: ICCV, 2025.
PDF Project Page BibTeX
@inproceedings{ying2025ldpose,
  title={LDPose: Towards Inclusive Human Pose Estimation for Limb-Deficient Individuals in the Wild},
  author={Ying, Jiaying and Du, Heming and Zhang, Kaihao and Li, Lincheng and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={9865--9875},
  year={2025}
}
        
Publication thumbnail
3RealCar: An in-the-wild RGB-D Car Dataset with 360-Degree Views
Xiaobiao Du, Yida Wang, Haiyang Sun, Zhuojie Wu, Hongwei Sheng, Shuyun Wang, Jiaying Ying, Ming Lu, Tianqing Zhu, Kun Zhan, Xin Yu: ICCV, 2025.
PDF Project Page BibTeX
@inproceedings{du20253drealcar,
  title={3drealcar: An in-the-wild rgb-d car dataset with 360-degree views},
  author={Du, Xiaobiao and Wang, Yida and Sun, Haiyang and Wu, Zhuojie and Sheng, Hongwei and Wang, Shuyun and Ying, Jiaying and Lu, Ming and Zhu, Tianqing and Zhan, Kun and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
  pages={26488--26498},
  year={2025}
}
        
Publication thumbnail
Plantseg: A Large-scale in-the-wild Dataset for Plant Disease Segmentation
Tianqi Wei, Zhi Chen, Xin Yu, Scott Chapman, Paul Melloy, Zi Huang: Nature Scientific Data, 2026.
PDF Project Page BibTeX
@article{wei2024plantseg,
  title={Plantseg: A large-scale in-the-wild dataset for plant disease segmentation},
  author={Wei, Tianqi and Chen, Zhi and Yu, Xin and Chapman, Scott and Melloy, Paul and Huang, Zi},
  journal={arXiv preprint arXiv:2409.04038},
  year={2024}
}
        
Publication thumbnail
Benchmarking in-the-wild Multimodal Disease Recognition and a Versatile Baseline
Tianqi Wei, Zhi Chen, Zi Huang, Xin Yu: ACM International Conference on Multimedia, 2024.
PDF Project Page BibTeX
@inproceedings{wei2024benchmarking,
  title={Benchmarking in-the-wild multimodal disease recognition and a versatile baseline},
  author={Wei, Tianqi and Chen, Zhi and Huang, Zi and Yu, Xin},
  booktitle={Proceedings of the 32nd ACM International Conference on Multimedia},
  pages={1593--1601},
  year={2024}
}
        
Publication thumbnail
M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings
Qingzheng Xu, Ru Cao, Xin Shen, Heming Du, Sen Wang, Xin Yu: CVPR, 2025.
PDF Project Page BibTeX
@inproceedings{xu2025m3gym,
  title={M3GYM: A Large-Scale Multimodal Multi-view Multi-person Pose Dataset for Fitness Activity Understanding in Real-world Settings},
  author={Xu, Qingzheng and Cao, Ru and Shen, Xin and Du, Heming and Wang, Sen and Yu, Xin},
  booktitle={Proceedings of the Computer Vision and Pattern Recognition Conference},
  pages={12289--12300},
  year={2025}
}
        
Publication thumbnail
Benchmarking Audio-Visual Segmentation for Long-Untrimmed Videos
Chen Liu, Patrick Li, Qingtao Yu, Hongwei Sheng, Dadong Wang, Lincheng Li, Xin Yu: CVPR, 2024.
PDF Project Page BibTeX
@inproceedings{liu2024benchmarking,
  title={Benchmarking audio visual segmentation for long-untrimmed videos},
  author={Liu, Chen and Li, Peike Patrick and Yu, Qingtao and Sheng, Hongwei and Wang, Dadong and Li, Lincheng and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={22712--22722},
  year={2024}
}
        
Publication thumbnail
Auslan-daily: Australian Sign Language Translation for Daily Communication and News
Xin Shen, Shaozu Yuan, Hongwei Sheng, Heming Du, Xin Yu: NeurIPS, 2023.
PDF Project Page BibTeX
@inproceedings{shen2023auslan,
  title={Auslan-daily: Australian sign language translation for daily communication and news},
  author={Shen, Xin and Yuan, Shaozu and Sheng, Hongwei and Du, Heming and Yu, Xin},
  booktitle={Advances in Neural Information Processing Systems},
  volume={36},
  pages={80455--80469},
  year={2023}
}
        
Publication thumbnail
RVD: A Handheld Device-Based Fundus Video Dataset for Retinal Vessel Segmentation
Hongwei Sheng*, Hu Zhang*, Wahiduzzaman MD Khan*, Heming Du, Sen Wang, Minas Theodore Coroneo, Farshid Hajati, Sahar Shariflou, Michael Kalloniatis, Jack Phu, Ashish Agar, Zi Huang, Mojtaba Golzan, Xin Yu: NeurIPS, 2023. (Equal Contribution)
PDF Project Page BibTeX
@article{khan2023rvd,
  title={RVD: a handheld device-based fundus video dataset for retinal vessel segmentation},
  author={Khan, MD Wahiduzzaman and Sheng, Hongwei and Zhang, Hu and Du, Heming and Wang, Sen and Coroneo, Minas and Hajati, Farshid and Shariflou, Sahar and Kalloniatis, Michael and Phu, Jack and Agar, Ashish and Huang, Zi and Golzan, Mojtaba and Yu, Xin},
  journal={Advances in Neural Information Processing Systems},
  volume={36},
  pages={18203--18224},
  year={2023}
}
        
Publication thumbnail
Word-level Deep Sign Language Recognition from Video: A New Large-scale Dataset and Methods Comparison
Dongxu Li, Cristian Rodriguez, Xin Yu, Hongdong Li: WACV, 2020. (Best Paper Honourable Mention Award)
PDF Project Page BibTeX
@inproceedings{li2020word,
  title={Word-level deep sign language recognition from video: A new large-scale dataset and methods comparison},
  author={Li, Dongxu and Rodriguez, Cristian and Yu, Xin and Li, Hongdong},
  booktitle={Proceedings of the IEEE/CVF winter conference on applications of computer vision},
  pages={1459--1469},
  year={2020}
}
        

📚 Human-Centric Visual Intelligence (Face, Body, Action, Emotion)

Publication thumbnail
Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement
Xin Shen, Xinyu Wang, Lei Shen, Kaihao Zhang, Xin Yu: ICCV, 2025.
PDF Project Page BibTeX
@inproceedings{shen2025cross,
  title={Cross-View Isolated Sign Language Recognition via View Synthesis and Feature Disentanglement},
  author={Xin Shen and Xinyu Wang and Lei Shen and Kaihao Zhang and Xin Yu},
  booktitle={ICCV},
  year={2025}
}
      
Publication thumbnail
Styletalk++: A unified Framework for Controlling the Speaking Styles of Talking Heads
Suzhen Wang, Yifeng Ma, Yu Ding, Zhipeng Hu, Changjie Fan, Tangjie Lv, Zhidong Deng, Xin Yu: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2024.
PDF Project Page BibTeX
@article{wang2024styletalk++,
  title={Styletalk++: A unified framework for controlling the speaking styles of talking heads},
  author={Wang, Suzhen and Ma, Yifeng and Ding, Yu and Hu, Zhipeng and Fan, Changjie and Lv, Tangjie and Deng, Zhidong and Yu, Xin},
  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume={46},
  number={6},
  pages={4331--4347},
  year={2024}
}
@inproceedings{ma2023styletalk,
  title={Styletalk: One-shot talking head generation with controllable speaking styles},
  author={Ma, Yifeng and Wang, Suzhen and Hu, Zhipeng and Fan, Changjie and Lv, Tangjie and Ding, Yu and Deng, Zhidong and Yu, Xin},
  booktitle={Proceedings of the AAAI conference on artificial intelligence},
  volume={37},
  number={2},
  pages={1896--1904},
  year={2023}
}
@inproceedings{wang2021audio2head,
  title={Audio2Head: Audio-driven One-shot Talking-head Generation with Natural Head Motion},
  author={Wang, Suzhen and Li, Lincheng and Ding, Yu and Fan, Changjie and Yu, Xin},
  booktitle={Proceedings of the Thirtieth International Joint Conference On Artificial Intelligence, Ijcai 2021},
  pages={1098--1105},
  year={2021},
  organization={International Joint Conferences on Artificial Intelligence Organization}
}
      
Publication thumbnail
Diverse 3D Hand Gesture Prediction From Body Dynamics by Bilateral Hand Disentanglement
Xingqun Qi, Chen Liu, Muyi Sun, Lincheng Li, Changjie Fan, Xin Yu: CVPR, 2023.
PDF Project Page BibTeX
@inproceedings{qi2023diverse,
  title={Diverse 3D hand gesture prediction from body dynamics by bilateral hand disentanglement},
  author={Qi, Xingqun and Liu, Chen and Sun, Muyi and Li, Lincheng and Fan, Changjie and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages={4616--4626},
  year={2023}
}
      
Publication thumbnail
Transferring Cross-domain Knowledge for Video Sign Language Recognition
Dongxu Li, Xin Yu, Chenchen Xu, Lars Petersson, Hongdong Li: CVPR, 2020. (Nomination for Best Paper Award)
PDF Project Page BibTeX
@inproceedings{li2020transferring,
  title={Transferring cross-domain knowledge for video sign language recognition},
  author={Li, Dongxu and Yu, Xin and Xu, Chenchen and Petersson, Lars and Li, Hongdong},
  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages={6205--6214},
  year={2020}
}
      
Publication thumbnail
Copy and Paste GAN: Face Hallucination from Shaded Thumbnails
Yang Zhang, Ivor Tsang, Yawei Luo, Changhui Hu, Xiaobo Lu, Xin Yu: CVPR, 2020. (Oral)
PDF Project Page BibTeX
@inproceedings{zhang2020copy,
  title={Copy and paste GAN: Face hallucination from shaded thumbnails},
  author={Zhang, Yang and Tsang, Ivor W and Luo, Yawei and Hu, Chang-Hui and Lu, Xiaobo and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages={7355--7364},
  year={2020}
}
@article{zhang2021recursive,
  title={Recursive copy and paste GAN: Face hallucination from shaded thumbnails},
  author={Zhang, Yang and Tsang, Ivor W and Luo, Yawei and Hu, Changhui and Lu, Xiaobo and Yu, Xin},
  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume={44},
  number={8},
  pages={4321--4338},
  year={2021}
}
      
Publication thumbnail
Gait Recognition via Effective Global-Local Feature Representation and Local Temporal Aggregation
Beibei Lin, Shunli Zhang, Xin Yu: ICCV, 2021.
PDF Project Page BibTeX
@inproceedings{lin2021gait,
  title={Gait recognition via effective global-local feature representation and local temporal aggregation},
  author={Lin, Beibei and Zhang, Shunli and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF international conference on computer vision},
  pages={14648--14656},
  year={2021}
}
      

📚 Foundation Models and Multi-modality Models (MLLM, VLM)

Publication thumbnail
TPR: Topology-preserving Reservoirs for Generalized Zero-shot Learning
Hui Chen, Yanbin Liu, Yongqiang Ma, Nanning Zheng, Xin Yu: NeurIPS, 2024.
PDF Project Page BibTeX
@inproceedings{chen2024tpr,
  title={TPR: Topology-preserving reservoirs for generalized zero-shot learning},
  author={Chen, Hui and Liu, Yanbin and Ma, Yongqiang and Zheng, Nanning and Yu, Xin},
  booktitle={Advances in Neural Information Processing Systems},
  volume={37},
  pages={102229--102254},
  year={2024}
}
        
Publication thumbnail
MDAM3: A Misinformation Detection and Analysis Framework for Multitype Multimodal Media
Qingzheng Xu, Heming Du, Szymon Łukasik, Tianqing Zhu, Sen Wang, Xin Yu: ACM on Web Conference (WWW), 2025.
PDF Project Page BibTeX
@inproceedings{xu2025mdam3,
    title={MDAM3: A Misinformation Detection and Analysis Framework for Multitype Multimodal Media},
    author={Xu, Qingzheng and Du, Heming and {\L}ukasik, Szymon and Zhu, Tianqing and Wang, Sen and Yu, Xin},
    booktitle={Proceedings of the ACM on Web Conference 2025},
    pages={5285--5296},
    year={2025}
}
        
Publication thumbnail
Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model
Shuyun Wang, Hu Zhang, Xin Shen, Dadong Wang, Xin Yu: CVPR, 2025.
PDF Project Page BibTeX
@inproceedings{wang2025blind,
  title={Blind Bitstream-corrupted Video Recovery via Metadata-guided Diffusion Model},
  author={Wang, Shuyun and Zhang, Hu and Shen, Xin and Wang, Dadong and Yu, Xin},
  booktitle={Proceedings of the Computer Vision and Pattern Recognition Conference},
  pages={22975--22984},
  year={2025}
}
        

📚 3D Vision and Augmented Reality (3D Reconstruction, 3D Understanding)

Publication thumbnail
Efficientdreamer: High-fidelity and robust 3d creation via orthogonal-view diffusion priors
Zhipeng Hu, Yongqiang Zhang, Chen Liu, Lincheng Li, Sida Peng, Xiaowei Zhou, Changjie Fan, Xin Yu: CVPR, 2024.
PDF Project Page BibTeX
@inproceedings{hu2024efficientdreamer,
  title={Efficientdreamer: High-fidelity and robust 3d creation via orthogonal-view diffusion priors},
  author={Hu, Zhipeng and Zhao, Minda and Zhao, Chaoyi and Liang, Xinyue and Li, Lincheng and Zhao, Zeng and Fan, Changjie and Zhou, Xiaowei and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
  pages={4949--4958},
  year={2024}
}
      
Publication thumbnail
CPT-VR: Improving Surface Rendering via Closest Point Transform with View-Reflection Appearance
Zhipeng Hu, Yongqiang Zhang, Chen Liu, Lincheng Li, Sida Peng, Xiaowei Zhou, Changjie Fan, Xin Yu: ECCV, 2024.
PDF Project Page BibTeX
@inproceedings{hu2024cpt,
  title={CPT-VR: Improving Surface Rendering via Closest Point Transform with View-Reflection Appearance},
  author={Hu, Zhipeng and Zhang, Yongqiang and Liu, Chen and Li, Lincheng and Peng, Sida and Zhou, Xiaowei and Fan, Changjie and Yu, Xin},
  booktitle={European Conference on Computer Vision},
  pages={223--239},
  year={2024}
}
      
Publication thumbnail
FreeAvatar:Robust 3D Facial Animation Transfer by Learning an Expression Foundation Model
Feng Qiu, Wei Zhang, Chen Liu, Rudong An, Lincheng Li, Yu Ding, Changjie Fan, Zhipeng Hu, Xin Yu: SIGGRAPH Asia, 2024.
PDF Project Page BibTeX
@inproceedings{qiu2024freeavatar,
  title={FreeAvatar: Robust 3D Facial Animation Transfer by Learning an Expression Foundation Model},
  author={Qiu, Feng and Zhang, Wei and Liu, Chen and An, Rudong and Li, Lincheng and Ding, Yu and Fan, Changjie and Hu, Zhipeng and Yu, Xin},
  booktitle={SIGGRAPH Asia 2024 Conference Papers},
  pages={1--11},
  year={2024}
}
      
Publication thumbnail
NeFII: Inverse Rendering for Reflectance Decomposition with Near-Field Indirect Illumination
Haoqian Wu, Zhipeng Hu, Lincheng Li, Yongqiang Zhang, Changjie Fan, Xin Yu: CVPR, 2023.
PDF Project Page BibTeX
@inproceedings{wu2023nefii,
  title={Nefii: Inverse rendering for reflectance decomposition with near-field indirect illumination},
  author={Wu, Haoqian and Hu, Zhipeng and Li, Lincheng and Zhang, Yongqiang and Fan, Changjie and Yu, Xin},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={4295--4304},
  year={2023}
}
      
Publication thumbnail
Accurate 3-DoF Camera Geo-Localization via Ground-to-Satellite Image Matching
Yujiao Shi*, Xin Yu*, Liu Liu, Dylan Campbell, Piotr Koniusz, Hongdong Li: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2023. (Equal Contribution)
PDF Project Page BibTeX
@article{shi2022accurate,
  title={Accurate 3-DoF camera geo-localization via ground-to-satellite image matching},
  author={Shi, Yujiao and Yu, Xin and Liu, Liu and Campbell, Dylan and Koniusz, Piotr and Li, Hongdong},
  journal={IEEE transactions on pattern analysis and machine intelligence},
  volume={45},
  number={3},
  pages={2682--2697},
  year={2022}
}
@inproceedings{shi2020looking,
  title={Where am i looking at? joint location and orientation estimation by cross-view matching},
  author={Shi, Yujiao and Yu, Xin and Campbell, Dylan and Li, Hongdong},
  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
  pages={4064--4072},
  year={2020}
}
@inproceedings{shi2020optimal,
  title={Optimal feature transport for cross-view image geo-localization},
  author={Shi, Yujiao and Yu, Xin and Liu, Liu and Zhang, Tong and Li, Hongdong},
  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
  volume={34},
  number={07},
  pages={11990--11997},
  year={2020}
}
@article{shi2019spatial,
  title={Spatial-aware feature aggregation for image based cross-view geo-localization},
  author={Shi, Yujiao and Liu, Liu and Yu, Xin and Li, Hongdong},
  journal={Advances in Neural Information Processing Systems},
  volume={32},
  year={2019}
}
      
Publication thumbnail
Deep Hierarchical Representation of Point Cloud Videos via Spatio-Temporal Decomposition
Hehe Fan, Xin Yu, Yi Yang, Mohan Kankanhalli: IEEE Trans. Pattern Analysis and Machine Intelligence, 2022.
PDF Project Page BibTeX
@article{fan2021deep,
  title={Deep hierarchical representation of point cloud videos via spatio-temporal decomposition},
  author={Fan, Hehe and Yu, Xin and Yang, Yi and Kankanhalli, Mohan},
  journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume={44},
  number={12},
  pages={9918--9930},
  year={2021},
  publisher={IEEE}
}
@article{fan2022pstnet,
  title={Pstnet: Point spatio-temporal convolution on point cloud sequences},
  author={Fan, Hehe and Yu, Xin and Ding, Yuhang and Yang, Yi and Kankanhalli, Mohan},
  journal={arXiv preprint arXiv:2205.13713},
  year={2022}
}
      
Publication thumbnail
6DoF Object Pose Estimation via Differentiable Proxy Voting Regularizer
Xin Yu, Zheyu Zhuang, Piotr Koniusz, Hongdong Li: BMVC, 2020. (Oral)
PDF Project Page BibTeX
@inproceedings{yu20206dof,
  title={6DoF Object Pose Estimation via Differentiable Proxy Voting Regularizer.},
  author={Yu, Xin and Zhuang, Zheyu and Koniusz, Piotr and Li, Hongdong},
  booktitle={BMVC},
  pages={1--13},
  year={2020}
}
      
Publication thumbnail
SOSNet: Second Order Similarity Regularization for Local Descriptor Learning
Yurun Tian, Xin Yu, Bin Fan, Fuchao Wu, Huub Heijnen, Vassileios Balntas: CVPR, 2019. (Oral)
PDF Project Page BibTeX
@inproceedings{Tian_2019_CVPR,
  author = {Tian, Yurun and Yu, Xin and Fan, Bin and Wu, Fuchao and Heijnen, Huub and Balntas, Vassileios},
  title = {SOSNet: Second Order Similarity Regularization for Local Descriptor Learning},
  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
  month = {June},
  year = {2019}
}