@conference {18456, title = {Using computer vision to generate customized spatial audio}, booktitle = {Multimedia and Expo, IEEE International Conference on}, volume = {3}, year = {2003}, month = {2003///}, pages = {57 - 60}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {Creating high quality virtual spatial audio over headphones requires real-time head tracking, personalized head-related transfer functions (HRTFs) and customized room response models. While there are expensive solutions to address these issues based on costly head trackers, measured personalized HRTFs and room responses, these are not suitable for widespread or easy deployment and use. We report on the development of a system that uses computer vision to produce customizable models for both the HRTF and the room response, and to achieve head-tracking. The system uses relatively inexpensive cameras and widely available personal computers. Computer-vision based anthropometric measurements of the head, torso, and the external ears are used for HRTF customization. For low-frequency HRTF customization we employ a simple head-and-torso model developed recently [V. R. Algazi et al., 2002]. For high frequency customization we employ measured pinna characteristics as an index into a database of HRTFs [D. N. Zotkin et al., 2002]. For head tracking we employ an online implementation of the POSIT algorithm [D. DeMenthon and L. Davis, 1995] along with active markers to compute head pose in real-time. The system provides an enhanced virtual listening experience at low cost.}, isbn = {0-7803-7965-9}, doi = {http://doi.ieeecomputersociety.org/10.1109/ICME.2003.1221247}, author = {Mohan,A. and Duraiswami, Ramani and Zotkin,Dmitry N and DeMenthon,D. and Davis, Larry S.} }