@conference {14192, title = {Active scene recognition with vision and language}, booktitle = {2011 IEEE International Conference on Computer Vision (ICCV)}, year = {2011}, month = {2011/11/06/13}, pages = {810 - 817}, publisher = {IEEE}, organization = {IEEE}, abstract = {This paper presents a novel approach to utilizing high level knowledge for the problem of scene recognition in an active vision framework, which we call active scene recognition. In traditional approaches, high level knowledge is used in the post-processing to combine the outputs of the object detectors to achieve better classification performance. In contrast, the proposed approach employs high level knowledge actively by implementing an interaction between a reasoning module and a sensory module (Figure 1). Following this paradigm, we implemented an active scene recognizer and evaluated it with a dataset of 20 scenes and 100+ objects. We also extended it to the analysis of dynamic scenes for activity recognition with attributes. Experiments demonstrate the effectiveness of the active paradigm in introducing attention and additional constraints into the sensing process.}, keywords = {accuracy, active scene recognition, classification performance, Cognition, Computer vision, Detectors, Equations, high level knowledge utilization, HUMANS, image classification, inference mechanisms, object detectors, Object recognition, reasoning module, sensing process, sensory module, support vector machines, Training}, isbn = {978-1-4577-1101-5}, doi = {10.1109/ICCV.2011.6126320}, author = {Yu,Xiaodong and Ferm{\"u}ller, Cornelia and Ching Lik Teo and Yezhou Yang and Aloimonos, J.} } @conference {12479, title = {Moving vistas: Exploiting motion for describing scenes}, booktitle = {2010 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year = {2010}, month = {2010/06/13/18}, pages = {1911 - 1918}, publisher = {IEEE}, organization = {IEEE}, abstract = {Scene recognition in an unconstrained setting is an open and challenging problem with wide applications. In this paper, we study the role of scene dynamics for improved representation of scenes. We subsequently propose dynamic attributes which can be augmented with spatial attributes of a scene for semantically meaningful categorization of dynamic scenes. We further explore accurate and generalizable computational models for characterizing the dynamics of unconstrained scenes. The large intra-class variation due to unconstrained settings and the complex underlying physics present challenging problems in modeling scene dynamics. Motivated by these factors, we propose using the theory of chaotic systems to capture dynamics. Due to the lack of a suitable dataset, we compiled a dataset of {\textquoteleft}in-the-wild{\textquoteright} dynamic scenes. Experimental results show that the proposed framework leads to the best classification rate among other well-known dynamic modeling techniques. We also show how these dynamic features provide a means to describe dynamic scenes with motion-attributes, which then leads to meaningful organization of the video data.}, keywords = {Application software, Automation, Chaos, chaotic system, Computational modeling, Computer vision, dynamic scene categorization, Educational institutions, HUMANS, image recognition, in the wild dynamic scene, Layout, motion attribute, natural scenes, Physics, probability, scene recognition, Snow, video data}, isbn = {978-1-4244-6984-0}, doi = {10.1109/CVPR.2010.5539864}, author = {Shroff, N. and Turaga,P. and Chellapa, Rama} } @article {12476, title = {Online Empirical Evaluation of Tracking Algorithms}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {32}, year = {2010}, month = {2010/08//}, pages = {1443 - 1458}, abstract = {Evaluation of tracking algorithms in the absence of ground truth is a challenging problem. There exist a variety of approaches for this problem, ranging from formal model validation techniques to heuristics that look for mismatches between track properties and the observed data. However, few of these methods scale up to the task of visual tracking, where the models are usually nonlinear and complex and typically lie in a high-dimensional space. Further, scenarios that cause track failures and/or poor tracking performance are also quite diverse for the visual tracking problem. In this paper, we propose an online performance evaluation strategy for tracking systems based on particle filters using a time-reversed Markov chain. The key intuition of our proposed methodology relies on the time-reversible nature of physical motion exhibited by most objects, which in turn should be possessed by a good tracker. In the presence of tracking failures due to occlusion, low SNR, or modeling errors, this reversible nature of the tracker is violated. We use this property for detection of track failures. To evaluate the performance of the tracker at time instant t, we use the posterior of the tracking algorithm to initialize a time-reversed Markov chain. We compute the posterior density of track parameters at the starting time t = 0 by filtering back in time to the initial time instant. The distance between the posterior density of the time-reversed chain (at t = 0) and the prior density used to initialize the tracking algorithm forms the decision statistic for evaluation. It is observed that when the data are generated by the underlying models, the decision statistic takes a low value. We provide a thorough experimental analysis of the evaluation methodology. Specifically, we demonstrate the effectiveness of our approach for tackling common challenges such as occlusion, pose, and illumination changes and provide the Receiver Operating Characteristic (ROC) curves. Finally, we also s how the applicability of the core ideas of the paper to other tracking algorithms such as the Kanade-Lucas-Tomasi (KLT) feature tracker and the mean-shift tracker.}, keywords = {Back, Biomedical imaging, Computer vision, Filtering, formal model validation techniques, formal verification, ground truth, Kanade Lucas Tomasi feature tracker, Karhunen-Loeve transforms, lighting, Markov processes, mean shift tracker, model validation., online empirical evaluation, particle filtering (numerical methods), Particle filters, Particle tracking, performance evaluation, receiver operating characteristic curves, Robustness, SNR, Statistics, Surveillance, time reversed Markov chain, tracking, tracking algorithms, visual tracking}, isbn = {0162-8828}, doi = {10.1109/TPAMI.2009.135}, author = {Wu,Hao and Sankaranarayanan,A. C and Chellapa, Rama} } @conference {12480, title = {Tracking via object reflectance using a hyperspectral video camera}, booktitle = {2010 IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops (CVPRW)}, year = {2010}, month = {2010/06/13/18}, pages = {44 - 51}, publisher = {IEEE}, organization = {IEEE}, abstract = {Recent advances in electronics and sensor design have enabled the development of a hyperspectral video camera that can capture hyperspectral datacubes at near video rates. The sensor offers the potential for novel and robust methods for surveillance by combining methods from computer vision and hyperspectral image analysis. Here, we focus on the problem of tracking objects through challenging conditions, such as rapid illumination and pose changes, occlusions, and in the presence of confusers. A new framework that incorporates radiative transfer theory to estimate object reflectance and the mean shift algorithm to simultaneously track the object based on its reflectance spectra is proposed. The combination of spectral detection and motion prediction enables the tracker to be robust against abrupt motions, and facilitate fast convergence of the mean shift tracker. In addition, the system achieves good computational efficiency by using random projection to reduce spectral dimension. The tracker has been evaluated on real hyperspectral video data.}, keywords = {Computer vision, electronic design, hyperspectral datacubes, hyperspectral image analysis, Hyperspectral imaging, Hyperspectral sensors, hyperspectral video camera, Image motion analysis, Image sensors, lighting, Motion estimation, motion prediction, Object detection, object reflectance tracking, random projection, Reflectivity, robust methods, Robustness, sensor design, spectral detection, Surveillance, tracking, Video surveillance}, isbn = {978-1-4244-7029-7}, doi = {10.1109/CVPRW.2010.5543780}, author = {Nguyen,Hien Van and Banerjee, A. and Chellapa, Rama} } @conference {13086, title = {Web-scale computer vision using MapReduce for multimedia data mining}, booktitle = {Proceedings of the Tenth International Workshop on Multimedia Data Mining}, series = {MDMKDD {\textquoteright}10}, year = {2010}, month = {2010///}, pages = {9:1{\textendash}9:10 - 9:1{\textendash}9:10}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This work explores computer vision applications of the MapReduce framework that are relevant to the data mining community. An overview of MapReduce and common design patterns are provided for those with limited MapReduce background. We discuss both the high level theory and the low level implementation for several computer vision algorithms: classifier training, sliding windows, clustering, bag-of-features, background subtraction, and image registration. Experimental results for the k-means clustering and single Gaussian background subtraction algorithms are performed on a 410 node Hadoop cluster.}, keywords = {background subtraction, bag-of-features, Cloud computing, clustering, Computer vision, image registration, MapReduce}, isbn = {978-1-4503-0220-3}, doi = {10.1145/1814245.1814254}, url = {http://doi.acm.org/10.1145/1814245.1814254}, author = {White,Brandyn and Tom Yeh and Jimmy Lin and Davis, Larry S.} } @conference {18352, title = {Searching documentation using text, OCR, and image}, booktitle = {Proceedings of the 32nd international ACM SIGIR conference on Research and development in information retrieval}, series = {SIGIR {\textquoteright}09}, year = {2009}, month = {2009///}, pages = {776 - 777}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {We describe a mixed-modality method to index and search software documentation in three ways: plain text, OCR text of embedded figures, and visual features of these figures. Using a corpus of 102 computer books with a total of 62,943 pages and 75,800 figures, we empirically demonstrate that our method achieves better precision/recall than do alternatives based on single modalities.}, keywords = {Computer vision, content-based image retrieval, multimodal search}, isbn = {978-1-60558-483-6}, doi = {10.1145/1571941.1572123}, url = {http://doi.acm.org/10.1145/1571941.1572123}, author = {Tom Yeh and Katz,Boris} } @conference {14205, title = {Bilateral symmetry of object silhouettes under perspective projection}, booktitle = {19th International Conference on Pattern Recognition, 2008. ICPR 2008}, year = {2008}, month = {2008/12/08/11}, pages = {1 - 4}, publisher = {IEEE}, organization = {IEEE}, abstract = {Symmetry is an important property of objects and is exhibited in different forms e.g., bilateral, rotational, etc. This paper presents an algorithm for computing the bilateral symmetry of silhouettes of shallow objects under perspective distortion, exploiting the invariance of the cross ratio to projective transformations. The basic idea is to use the cross ratio to compute a number of midpoints of cross sections and then fit a straight line through them. The goodness-of-fit determines the likelihood of the line to be the axis of symmetry. We analytically estimate the midpointpsilas location as a function of the vanishing point for a given object silhouette. Hence finding the symmetry axis amounts to a 2D search in the space of vanishing points. We present experiments on two datasets as well as Internet images of symmetric objects that validate our approach.}, keywords = {Automation, bilateral symmetry, Computer vision, Frequency, Image analysis, Image coding, Image reconstruction, Internet, Internet images, Object detection, object silhouettes, perspective distortion, perspective projection, SHAPE, symmetric objects}, isbn = {978-1-4244-2174-9}, doi = {10.1109/ICPR.2008.4761501}, author = {Bitsakos,K. and Yi,H. and Yi,L. and Ferm{\"u}ller, Cornelia} } @conference {18477, title = {Imaging concert hall acoustics using visual and audio cameras}, booktitle = {IEEE International Conference on Acoustics, Speech and Signal Processing, 2008. ICASSP 2008}, year = {2008}, month = {2008///}, pages = {5284 - 5287}, publisher = {IEEE}, organization = {IEEE}, abstract = {Using a developed real time audio camera, that uses the output of a spherical microphone array beamformer steered in all directions to create central projection to create acoustic intensity images, we present a technique to measure the acoustics of rooms and halls. A panoramic mosaiced visual image of the space is also create. Since both the visual and the audio camera images are central projection, registration of the acquired audio and video images can be performed using standard computer vision techniques. We describe the technique, and apply it to the examine the relation between acoustical features and architectural details of the Dekelbaum concert hall at the Clarice Smith Performing Arts Center in College Park, MD.}, keywords = {Acoustic imaging, acoustic intensity images, acoustic measurement, Acoustic measurements, Acoustic scattering, acoustic signal processing, acoustical camera, acoustical scene analysis, acquired audio registration, audio cameras, audio signal processing, CAMERAS, central projection, Computer vision, Educational institutions, HUMANS, image registration, Image segmentation, imaging concert hall acoustics, Layout, microphone arrays, panoramic mosaiced visual image, Raman scattering, reverberation, room acoustics, spherical microphone array beamformer, spherical microphone arrays, video image registration, visual cameras}, isbn = {978-1-4244-1483-3}, doi = {10.1109/ICASSP.2008.4518852}, author = {O{\textquoteright}Donovan,A. and Duraiswami, Ramani and Zotkin,Dmitry N} } @conference {18350, title = {Photo-based question answering}, booktitle = {Proceedings of the 16th ACM international conference on Multimedia}, series = {MM {\textquoteright}08}, year = {2008}, month = {2008///}, pages = {389 - 398}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Photo-based question answering is a useful way of finding information about physical objects. Current question answering (QA) systems are text-based and can be difficult to use when a question involves an object with distinct visual features. A photo-based QA system allows direct use of a photo to refer to the object. We develop a three-layer system architecture for photo-based QA that brings together recent technical achievements in question answering and image matching. The first, template-based QA layer matches a query photo to online images and extracts structured data from multimedia databases to answer questions about the photo. To simplify image matching, it exploits the question text to filter images based on categories and keywords. The second, information retrieval QA layer searches an internal repository of resolved photo-based questions to retrieve relevant answers. The third, human-computation QA layer leverages community experts to handle the most difficult cases. A series of experiments performed on a pilot dataset of 30,000 images of books, movie DVD covers, grocery items, and landmarks demonstrate the technical feasibility of this architecture. We present three prototypes to show how photo-based QA can be built into an online album, a text-based QA, and a mobile application.}, keywords = {Computer vision, Information retrieval, Question answering}, isbn = {978-1-60558-303-7}, doi = {10.1145/1459359.1459412}, url = {http://doi.acm.org/10.1145/1459359.1459412}, author = {Tom Yeh and Lee,John J and Darrell,Trevor} } @book {13327, title = {Shape Analysis and Structuring}, year = {2008}, month = {2008///}, publisher = {Springer}, organization = {Springer}, abstract = {With a lot of recent developments in the field, this much-needed book has come at just the right time. It covers a variety of topics related to preserving and enhancing shape information at a geometric level. The contributors also cover subjects that are relevant to effectively capturing the structure of a shape by identifying relevant shape components and their mutual relationships.}, keywords = {Computer Graphics, Computer vision, Computers / Computer Graphics, Computers / Image Processing, Geometrical models, Geometry, Geometry, Analytic, Image analysis, IMAGE PROCESSING, Mathematics / Functional Analysis, Mathematics / Geometry / General, Mathematics / Graphic Methods, Mathematics / Mathematical Analysis, shapes, Technology \& Engineering / Engineering (General), Visualization}, isbn = {9783540332640}, author = {De Floriani, Leila and Spagnuolo,Michela} } @conference {14207, title = {Combining motion from texture and lines for visual navigation}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems, 2007. IROS 2007}, year = {2007}, month = {2007/11/29/Oct. }, pages = {232 - 239}, publisher = {IEEE}, organization = {IEEE}, abstract = {Two novel methods for computing 3D structure information from video for a piecewise planar scene are presented. The first method is based on a new line constraint, which clearly separates the estimation of distance from the estimation of slant. The second method exploits the concepts of phase correlation to compute from the change of image frequencies of a textured plane, distance and slant information. The two different estimates together with structure estimates from classical image motion are combined and integrated over time using an extended Kalman filter. The estimation of the scene structure is demonstrated experimentally in a motion control algorithm that allows the robot to move along a corridor. We demonstrate the efficacy of each individual method and their combination and show that the method allows for visual navigation in textured as well as un-textured environments.}, keywords = {3D structure information, CAMERAS, Computer vision, extended Kalman filter, Frequency, image frequencies, Image motion analysis, Image texture, Kalman filters, Layout, motion control, Motion estimation, Navigation, Optical computing, phase correlation, piecewise planar scene, Robustness, Simultaneous localization and mapping, Speech processing, textured plane, video signal processing, visual navigation}, isbn = {978-1-4244-0912-9}, doi = {10.1109/IROS.2007.4399568}, author = {Bitsakos,K. and Li Yi and Ferm{\"u}ller, Cornelia} } @inbook {12640, title = {Computer Vision, Statistics in}, booktitle = {Encyclopedia of Statistical SciencesEncyclopedia of Statistical Sciences}, year = {2006}, month = {2006///}, publisher = {John Wiley \& Sons, Inc.}, organization = {John Wiley \& Sons, Inc.}, abstract = {Computer Vision (CV) broadly refers to the discipline where extraction of useful 2D and/or 3D information from one or more images is of interest. Useful information could consist of features such as edges, lines, curves and textures, or information about depth, motion, object descriptions, etc. CV problems are usually ill-posed inverse problems. Since the image data is usually obtained from sensors such as video cameras, infrared, radar, etc., the information extraction processes often have to deal with data that is corrupted by noise from the sensors and the environment. Statistics can help in obtaining robust and accurate solutions to these inverse problems by modeling the noise processes. We present here a broad overview of the applications of statistics to different computer vision problems and explain in detail two particular applications, tracking and motion analysis, where statistical approaches have been used very successfully.}, keywords = {Computer vision, sampling, Statistics, structure from motion, tracking}, isbn = {9780471667193}, url = {http://onlinelibrary.wiley.com/doi/10.1002/0471667196.ess3129/abstract}, author = {Chellapa, Rama and Chowdhury,Amit K. Roy} } @article {19661, title = {How Multirobot Systems Research will Accelerate our Understanding of Social Animal Behavior}, journal = {Proceedings of the IEEE}, volume = {94}, year = {2006}, month = {2006/07//}, pages = {1445 - 1463}, abstract = {Our understanding of social insect behavior has significantly influenced artificial intelligence (AI) and multirobot systems{\textquoteright} research (e.g., ant algorithms and swarm robotics). In this work, however, we focus on the opposite question: "How can multirobot systems research contribute to the understanding of social animal behavior?" As we show, we are able to contribute at several levels. First, using algorithms that originated in the robotics community, we can track animals under observation to provide essential quantitative data for animal behavior research. Second, by developing and applying algorithms originating in speech recognition and computer vision, we can automatically label the behavior of animals under observation. In some cases the automatic labeling is more accurate and consistent than manual behavior identification. Our ultimate goal, however, is to automatically create, from observation, executable models of behavior. An executable model is a control program for an agent that can run in simulation (or on a robot). The representation for these executable models is drawn from research in multirobot systems programming. In this paper we present the algorithms we have developed for tracking, recognizing, and learning models of social animal behavior, details of their implementation, and quantitative experimental results using them to study social insects}, keywords = {Acceleration, Animal behavior, ant movement tracking, Artificial intelligence, biology computing, Computer vision, control engineering computing, Insects, Intelligent robots, Labeling, monkey movement tracking, multi-robot systems, multirobot systems, robotics algorithms, Robotics and automation, social animal behavior, social animals, social insect behavior, Speech recognition, tracking}, isbn = {0018-9219}, author = {Balch, T. and Dellaert, F. and Feldman, A. and Guillory, A. and Isbell, C.L. and Zia Khan and Pratt, S.C. and Stein, A.N. and Wilde, H.} } @conference {12014, title = {Integration of visual and inertial information for egomotion: a stochastic approach}, booktitle = {Proceedings 2006 IEEE International Conference on Robotics and Automation, 2006. ICRA 2006}, year = {2006}, month = {2006/05/15/19}, pages = {2053 - 2059}, publisher = {IEEE}, organization = {IEEE}, abstract = {We present a probabilistic framework for visual correspondence, inertial measurements and egomotion. First, we describe a simple method based on Gabor filters to produce correspondence probability distributions. Next, we generate a noise model for inertial measurements. Probability distributions over the motions are then computed directly from the correspondence distributions and the inertial measurements. We investigate combining the inertial and visual information for a single distribution over the motions. We find that with smaller amounts of correspondence information, fusion of the visual data with the inertial sensor results in much better egomotion estimation. This is essentially because inertial measurements decrease the "translation-rotation" ambiguity. However, when more correspondence information is used, this ambiguity is reduced to such a degree that the inertial measurements provide negligible improvement in accuracy. This suggests that inertial and visual information are more closely integrated in a compositional sense}, keywords = {Computer vision, data mining, Distributed computing, egomotion estimation, Gabor filters, Gravity, inertial information, inertial sensor, Laboratories, Motion estimation, Noise measurement, Probability distribution, probability distributions, Rotation measurement, stochastic approach, Stochastic processes, visual information}, isbn = {0-7803-9505-0}, doi = {10.1109/ROBOT.2006.1642007}, author = {Domke, J. and Aloimonos, J.} } @conference {14187, title = {A Projective Invariant for Textures}, booktitle = {2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition}, volume = {2}, year = {2006}, month = {2006///}, pages = {1932 - 1939}, publisher = {IEEE}, organization = {IEEE}, abstract = {Image texture analysis has received a lot of attention in the past years. Researchers have developed many texture signatures based on texture measurements, for the purpose of uniquely characterizing the texture. Existing texture signatures, in general, are not invariant to 3D transforms such as view-point changes and non-rigid deformations of the texture surface, which is a serious limitation for many applications. In this paper, we introduce a new texture signature, called the multifractal spectrum (MFS). It provides an efficient framework combining global spatial invariance and local robust measurements. The MFS is invariant under the bi-Lipschitz map, which includes view-point changes and non-rigid deformations of the texture surface, as well as local affine illumination changes. Experiments demonstrate that the MFS captures the essential structure of textures with quite low dimension.}, keywords = {Computer science, Computer vision, Educational institutions, Fractals, Geometry, Image texture, Level set, lighting, Robustness, Surface texture}, isbn = {0-7695-2597-0}, doi = {10.1109/CVPR.2006.38}, author = {Yong Xu and Hui Ji and Ferm{\"u}ller, Cornelia} } @conference {12026, title = {A Sensory-Motor Language for Human Activity Understanding}, booktitle = {2006 6th IEEE-RAS International Conference on Humanoid Robots}, year = {2006}, month = {2006/12/04/6}, pages = {69 - 75}, publisher = {IEEE}, organization = {IEEE}, abstract = {We have empirically discovered that the space of human actions has a linguistic framework. This is a sensory-motor space consisting of the evolution of the joint angles of the human body in movement. The space of human activity has its own phonemes, morphemes, and sentences. We present a human activity language (HAL) for symbolic non-arbitrary representation of visual and motor information. In phonology, we define atomic segments (kinetemes) that are used to compose human activity. We introduce the concept of a kinetological system and propose five basic properties for such a system: compactness, view-invariance, reproducibility, selectivity, and reconstructivity. In morphology, we extend sequential language learning to incorporate associative learning with our parallel learning approach. Parallel learning is effective in identifying the kinetemes and active joints in a particular action. In syntax, we suggest four lexical categories for our human activity language (noun, verb, adjective, and adverb). These categories are combined into sentences through syntax for human movement}, keywords = {Actuators, associative learning, atomic segments, computational linguistics, Computer science, Computer vision, Educational institutions, grammars, human activity language, human activity understanding, human movement syntax, Humanoid robots, HUMANS, joint angles, kinetemes, kinetological system, Laboratories, learning (artificial intelligence), List key index terms here, Morphology, motor information, No mare than 5, parallel learning, Reproducibility of results, Robot kinematics, Robot programming, robot vision, sensory-motor language, sequential language learning, symbolic nonarbitrary representation, visual information}, isbn = {1-4244-0200-X}, doi = {10.1109/ICHR.2006.321365}, author = {Guerra-Filho,G. and Aloimonos, J.} } @book {12621, title = {Unconstrained face recognition}, year = {2006}, month = {2006///}, publisher = {Springer Science \& Business}, organization = {Springer Science \& Business}, abstract = {Face recognition has been actively studied over the past decade and continues to be a big research challenge. Just recently, researchers have begun to investigate face recognition under unconstrained conditions. Unconstrained Face Recognition provides a comprehensive review of this biometric, especially face recognition from video, assembling a collection of novel approaches that are able to recognize human faces under various unconstrained situations. The underlying basis of these approaches is that, unlike conventional face recognition algorithms, they exploit the inherent characteristics of the unconstrained situation and thus improve the recognition performance when compared with conventional algorithms.Unconstrained Face Recognition is structured to meet the needs of a professional audience of researchers and practitioners in industry. This volume is also suitable for advanced-level students in computer science.}, keywords = {Computer science, Computer vision, Computers / Computer Graphics, Computers / Computer Science, Computers / Computer Vision \& Pattern Recognition, Computers / General, Computers / Image Processing, Computers / Information Technology, Computers / Information Theory, Computers / Interactive \& Multimedia, Computers / Security / Cryptography, Computers / User Interfaces, Data encryption (Computer science), Data structures (Computer science), Electronic books, Human face recognition (Computer science), IMAGE PROCESSING, Image processing - Digital techniques, multimedia systems, Optical pattern recognition, Science / Life Sciences / Biology}, isbn = {9780387264073}, author = {Zhou,S. Kevin and Chellapa, Rama and Zhao,Wenyi} } @conference {17887, title = {Comparing the Performance of High-Level Middleware Systems in Shared and Distributed Memory Parallel Environments}, booktitle = {Parallel and Distributed Processing Symposium, 2005. Proceedings. 19th IEEE International}, year = {2005}, month = {2005/04//}, pages = {30 - 30}, publisher = {IEEE}, organization = {IEEE}, abstract = {The utilization of toolkits for writing parallel and/or distributed applications has been shown to greatly enhance developer{\textquoteright}s productivity. Such an approach hides many of the complexities associated with writing these applications, rather than relying solely on programming language aids and parallel library support, such as MPI or PVM. In this work, we evaluate three different middleware systems that have been used to implement a computation and I/O-intensive data analysis application from the domain of computer vision. This study shows the benefits and overheads associated with each of the middleware systems, in different homogeneous computational environments and with different workloads. Our results lead the way toward being able to make better decisions for tuning the application environment, for selecting the appropriate middleware, and also for designing more powerful middleware systems to efficiently build and run highly complex applications in both parallel and distributed computing environments.}, keywords = {Application software, Computer science, Computer vision, Data analysis, Distributed computing, distributed computing environment, distributed memory parallel environment, distributed shared memory systems, Educational institutions, high-level middleware system, I/O-intensive data analysis application, Libraries, Middleware, parallel computing environment, parallel library support, parallel memories, programming language, programming languages, Runtime environment, shared memory parallel environment, Writing}, isbn = {0-7695-2312-9}, doi = {10.1109/IPDPS.2005.144}, author = {Kim,Jik-Soo and Andrade,H. and Sussman, Alan} } @conference {18336, title = {Doubleshot: an interactive user-aided segmentation tool}, booktitle = {Proceedings of the 10th international conference on Intelligent user interfaces}, series = {IUI {\textquoteright}05}, year = {2005}, month = {2005///}, pages = {287 - 289}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we describe an intelligent user interface designed for camera phones to allow mobile users to specify the object of interest in the scene simply by taking two pictures: one with the object and one without the object. By comparing these two images, the system can reliably extract the visual appearance of the object, which can be useful to a wide-range of applications such as content-based image retrieval and object recognition.}, keywords = {Computer vision, mobile application, Object recognition}, isbn = {1-58113-894-6}, doi = {10.1145/1040830.1040901}, url = {http://doi.acm.org/10.1145/1040830.1040901}, author = {Tom Yeh and Darrell,Trevor} } @conference {14224, title = {Integration of motion fields through shape}, booktitle = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2005. CVPR 2005}, volume = {2}, year = {2005}, month = {2005/06/20/25}, pages = {663- 669 vol. 2 - 663- 669 vol. 2}, publisher = {IEEE}, organization = {IEEE}, abstract = {Structure from motion from single flow fields has been studied intensively, but the integration of information from multiple flow fields has not received much attention. Here we address this problem by enforcing constraints on the shape (surface normals) of the scene in view, as opposed to constraints on the structure (depth). The advantage of integrating shape is two-fold. First, we do not need to estimate feature correspondences over multiple frames, but we only need to match patches. Second, the shape vectors in the different views are related only by rotation. This constraint on shape can be combined easily with motion estimation, thus formulating motion and structure estimation from multiple views as a practical constrained minimization problem using a rank-3 constraint. Based on this constraint, we develop a 3D motion technique, which locates through color and motion segmentation, planar patches in the scene, matches patches over multiple frames, and estimates the motion between multiple frames and the shape of the selected scene patches using the image gradients. Experiments evaluate the accuracy of the 3D motion estimation and demonstrate the motion and shape estimation of the technique by super-resolving an image sequence.}, keywords = {3D motion estimation, Automation, CAMERAS, computational geometry, Computer vision, constrained minimization problem, decoupling translation from rotation, Educational institutions, image colour analysis, image gradients, image resolution, Image segmentation, image sequence, Image sequences, integration of motion fields, Layout, minimisation, Motion estimation, motion field integration, motion segmentation, parameter estimation, planar patches, rank-3 constraint, scene patches, SHAPE, shape and rotation, shape estimation, structure estimation}, isbn = {0-7695-2372-2}, doi = {10.1109/CVPR.2005.190}, author = {Ji,H. and Ferm{\"u}ller, Cornelia} } @article {19663, title = {MCMC-based particle filtering for tracking a variable number of interacting targets}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {27}, year = {2005}, month = {2005/11//}, pages = {1805 - 1819}, abstract = {We describe a particle filter that effectively deals with interacting targets, targets that are influenced by the proximity and/or behavior of other targets. The particle filter includes a Markov random field (MRF) motion prior that helps maintain the identity of targets throughout an interaction, significantly reducing tracker failures. We show that this MRF prior can be easily implemented by including an additional interaction factor in the importance weights of the particle filter. However, the computational requirements of the resulting multitarget filter render it unusable for large numbers of targets. Consequently, we replace the traditional importance sampling step in the particle filter with a novel Markov chain Monte Carlo (MCMC) sampling step to obtain a more efficient MCMC-based multitarget filter. We also show how to extend this MCMC-based filter to address a variable number of interacting targets. Finally, we present both qualitative and quantitative experimental results, demonstrating that the resulting particle filters deal efficiently and effectively with complicated target interactions.}, keywords = {algorithms, Animals, Artificial intelligence, Computer simulation, Computer vision, Filtering, filtering theory, HUMANS, Image Enhancement, Image Interpretation, Computer-Assisted, Index Terms- Particle filters, Information Storage and Retrieval, Insects, interacting targets, Markov chain Monte Carlo sampling step, Markov chain Monte Carlo., Markov chains, Markov processes, Markov random field motion, Markov random fields, Models, Biological, Models, Statistical, Monte Carlo Method, Monte Carlo methods, MOTION, Movement, multitarget filter, multitarget tracking, particle filtering, Particle filters, Particle tracking, Pattern Recognition, Automated, Sampling methods, Subtraction Technique, target tracking, Video Recording}, isbn = {0162-8828}, author = {Zia Khan and Balch, T. and Dellaert, F.} } @article {14230, title = {Motion segmentation using occlusions}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {27}, year = {2005}, month = {2005/06//}, pages = {988 - 992}, abstract = {We examine the key role of occlusions in finding independently moving objects instantaneously in a video obtained by a moving camera with a restricted field of view. In this problem, the image motion is caused by the combined effect of camera motion (egomotion), structure (depth), and the independent motion of scene entities. For a camera with a restricted field of view undergoing a small motion between frames, there exists, in general, a set of 3D camera motions compatible with the observed flow field even if only a small amount of noise is present, leading to ambiguous 3D motion estimates. If separable sets of solutions exist, motion-based clustering can detect one category of moving objects. Even if a single inseparable set of solutions is found, we show that occlusion information can be used to find ordinal depth, which is critical in identifying a new class of moving objects. In order to find ordinal depth, occlusions must not only be known, but they must also be filled (grouped) with optical flow from neighboring regions. We present a novel algorithm for filling occlusions and deducing ordinal depth under general circumstances. Finally, we describe another category of moving objects which is detected using cardinal comparisons between structure from motion and structure estimates from another source (e.g., stereo).}, keywords = {3D motion estimation, algorithms, Artificial intelligence, CAMERAS, Computer vision, Filling, hidden feature removal, Image Enhancement, Image Interpretation, Computer-Assisted, image motion, Image motion analysis, Image segmentation, Layout, MOTION, Motion detection, Motion estimation, motion segmentation, Movement, Object detection, occlusion, occlusions, optical flow, ordinal depth, Pattern Recognition, Automated, Photography, Reproducibility of results, segmentation, Semiconductor device modeling, Sensitivity and Specificity, video analysis., Video Recording}, isbn = {0162-8828}, doi = {10.1109/TPAMI.2005.123}, author = {Ogale, A. S and Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @conference {19665, title = {Multitarget tracking with split and merged measurements}, booktitle = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2005. CVPR 2005}, volume = {1}, year = {2005}, month = {2005/06//}, pages = {605 - 610 vol. 1}, abstract = {In many multitarget tracking applications in computer vision, a detection algorithm provides locations of potential targets. Subsequently, the measurements are associated with previously estimated target trajectories in a data association step. The output of the detector is often imperfect and the detection data may include multiple, split measurements from a single target or a single merged measurement from several targets. To address this problem, we introduce a multiple hypothesis tracker for interacting targets that generate split and merged measurements. The tracker is based on an efficient Markov chain Monte Carlo (MCMC) based auxiliary variable particle filter. The particle filter is Rao-Blackwellized such that the continuous target state parameters are estimated analytically, and an MCMC sampler generates samples from the large discrete space of data associations. In addition, we include experimental results in a scenario where we track several interacting targets that generate these split and merged measurements.}, keywords = {Application software, Computer vision, Detection algorithms, Detectors, filtering theory, Markov chain Monte Carlo based auxiliary variable particle filter, Markov processes, merged measurements, Monte Carlo methods, multiple hypothesis tracker, multitarget tracking, parameter estimation, Particle filters, Particle tracking, Rao-Blackwellized filter, split measurements, target tracking, Trajectory}, author = {Zia Khan and Balch, T. and Dellaert, F.} } @conference {19670, title = {What Are the Ants Doing? Vision-Based Tracking and Reconstruction of Control Programs}, booktitle = {Proceedings of the 2005 IEEE International Conference on Robotics and Automation, 2005. ICRA 2005}, year = {2005}, month = {2005/04//}, pages = {4182 - 4187}, abstract = {In this paper, we study the problem of going from a real-world, multi-agent system to the generation of control programs in an automatic fashion. In particular, a computer vision system is presented, capable of simultaneously tracking multiple agents, such as social insects. Moreover, the data obtained from this system is fed into a mode-reconstruction module that generates low-complexity control programs, i.e. strings of symbolic descriptions of control-interrupt pairs, consistent with the empirical data. The result is a mechanism for going from the real system to an executable implementation that can be used for controlling multiple mobile robots.}, keywords = {Animals, Automatic generation control, Biological information theory, Computer vision, Control systems, Mobile robots, Probability distribution, Robot control, target tracking, Trajectory}, author = {Egerstedt, M. and Balch, T. and Dellaert, F. and Delmotte, F. and Zia Khan} } @conference {14209, title = {Compound eye sensor for 3D ego motion estimation}, booktitle = {2004 IEEE/RSJ International Conference on Intelligent Robots and Systems, 2004. (IROS 2004). Proceedings}, volume = {4}, year = {2004}, month = {2004/10/28/Sept.}, pages = {3712- 3717 vol.4 - 3712- 3717 vol.4}, publisher = {IEEE}, organization = {IEEE}, abstract = {We describe a compound eye vision sensor for 3D ego motion computation. Inspired by eyes of insects, we show that the compound eye sampling geometry is optimal for 3D camera motion estimation. This optimality allows us to estimate the 3D camera motion in a scene-independent and robust manner by utilizing linear equations. The mathematical model of the new sensor can be implemented in analog networks resulting in a compact computational sensor for instantaneous 3D ego motion measurements in full six degrees of freedom.}, keywords = {3D camera motion estimation, CAMERAS, compound eye vision sensor, Computer vision, Equations, Eyes, Geometry, Image sensors, Insects, linear equations, Motion estimation, robot vision, Robustness, sampling geometry, Sampling methods, Sensor phenomena and characterization}, isbn = {0-7803-8463-6}, doi = {10.1109/IROS.2004.1389992}, author = {Neumann, J. and Ferm{\"u}ller, Cornelia and Aloimonos, J. and Brajovic,V.} } @conference {16944, title = {A Rank-by-Feature Framework for Unsupervised Multidimensional Data Exploration Using Low Dimensional Projections}, booktitle = {IEEE Symposium on Information Visualization, 2004. INFOVIS 2004}, year = {2004}, month = {2004///}, pages = {65 - 72}, publisher = {IEEE}, organization = {IEEE}, abstract = {Exploratory analysis of multidimensional data sets is challenging because of the difficulty in comprehending more than three dimensions. Two fundamental statistical principles for the exploratory analysis are (1) to examine each dimension first and then find relationships among dimensions, and (2) to try graphical displays first and then find numerical summaries (D.S. Moore, (1999). We implement these principles in a novel conceptual framework called the rank-by-feature framework. In the framework, users can choose a ranking criterion interesting to them and sort 1D or 2D axis-parallel projections according to the criterion. We introduce the rank-by-feature prism that is a color-coded lower-triangular matrix that guides users to desired features. Statistical graphs (histogram, boxplot, and scatterplot) and information visualization techniques (overview, coordination, and dynamic query) are combined to help users effectively traverse 1D and 2D axis-parallel projections, and finally to help them interactively find interesting features}, keywords = {axis-parallel projections, boxplot, color-coded lower-triangular matrix, computational complexity, computational geometry, Computer displays, Computer science, Computer vision, Data analysis, data mining, data visualisation, Data visualization, Displays, dynamic query, Educational institutions, exploratory data analysis, feature detection, feature detection/selection, Feature extraction, feature selection, graph theory, graphical displays, histogram, Information Visualization, interactive systems, Laboratories, Multidimensional systems, Principal component analysis, rank-by-feature prism, scatterplot, statistical analysis, statistical graphics, statistical graphs, unsupervised multidimensional data exploration, very large databases}, isbn = {0-7803-8779-3}, doi = {10.1109/INFVIS.2004.3}, author = {Seo,J. and Shneiderman, Ben} } @conference {19649, title = {A Rao-Blackwellized particle filter for EigenTracking}, booktitle = {Proceedings of the 2004 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2004. CVPR 2004}, volume = {2}, year = {2004}, month = {2004/06//}, pages = {II - 980-II-986 Vol.2}, abstract = {Subspace representations have been a popular way to model appearance in computer vision. In Jepson and Black{\textquoteright}s influential paper on EigenTracking, they were successfully applied in tracking. For noisy targets, optimization-based algorithms (including EigenTracking) often fail catastrophically after losing track. Particle filters have recently emerged as a robust method for tracking in the presence of multi-modal distributions. To use subspace representations in a particle filter, the number of samples increases exponentially as the state vector includes the subspace coefficients. We introduce an efficient method for using subspace representations in a particle filter by applying Rao-Blackwellization to integrate out the subspace coefficients in the state vector. Fewer samples are needed since part of the posterior over the state vector is analytically calculated. We use probabilistic principal component analysis to obtain analytically tractable integrals. We show experimental results in a scenario in which we track a target in clutter.}, keywords = {analytically tractable integrals, Computer vision, EigenTracking, Filters, Gaussian processes, modal analysis, multi-modal distributions, NOISE, noisy targets, optimisation, optimization-based algorithms, Particle filters, Particle measurements, Particle tracking, Principal component analysis, probabilistic principal component analysis, Rao-Blackwellized particle filter, Robustness, SHAPE, State estimation, state vector, subspace coefficients, Subspace representations, target tracking, vectors}, author = {Zia Khan and Balch, T. and Dellaert, F.} } @conference {12000, title = {Eye design in the plenoptic space of light rays}, booktitle = {Ninth IEEE International Conference on Computer Vision, 2003. Proceedings}, year = {2003}, month = {2003/10/13/16}, pages = {1160-1167 vol.2 - 1160-1167 vol.2}, publisher = {IEEE}, organization = {IEEE}, abstract = {Natural eye designs are optimized with regard to the tasks the eye-carrying organism has to perform for survival. This optimization has been performed by the process of natural evolution over many millions of years. Every eye captures a subset of the space of light rays. The information contained in this subset and the accuracy to which the eye can extract the necessary information determines an upper limit on how well an organism can perform a given task. In this work we propose a new methodology for camera design. By interpreting eyes as sample patterns in light ray space we can phrase the problem of eye design in a signal processing framework. This allows us to develop mathematical criteria for optimal eye design, which in turn enables us to build the best eye for a given task without the trial and error phase of natural evolution. The principle is evaluated on the task of 3D ego-motion estimation.}, keywords = {3D ego-motion estimation, Assembly, B-splines, Camera design, CAMERAS, captured image, compound eyes, Computer vision, data mining, eye, eye-carrying organism, Eyes, filter optimization, image representation, image resolution, Information geometry, Laboratories, light field reconstruction, light gathering power, light rays, mixed spherical-Cartesian coordinate system, Motion estimation, natural evolution process, natural eye designs, natural image statistics, optical nanotechnology, Optical signal processing, optimal eye design mathematical criteria, Organisms, plenoptic image formation, plenoptic space, plenoptic video geometry, sampling operators, sensory ecology, Signal design, Signal processing, signal processing framework, signal processing tool, square-summable sequences, visual acuity}, isbn = {0-7695-1950-4}, doi = {10.1109/ICCV.2003.1238623}, author = {Neumann, J. and Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @conference {11905, title = {Non-parametric expectation maximization: a learning automata approach}, booktitle = {IEEE International Conference on Systems, Man and Cybernetics, 2003}, volume = {3}, year = {2003}, month = {2003/10//}, pages = {2996- 3001 vol.3 - 2996- 3001 vol.3}, publisher = {IEEE}, organization = {IEEE}, abstract = {The famous expectation maximization technique suffers two major drawbacks. First, the number of components has to be specified apriori. Also, the expectation maximization is sensitive to initialization. In this paper, we present a new stochastic technique for estimating the mixture parameters. Parzen Window is used to estimate a discrete estimate of the PDF of the given data. Stochastic learning automata is then used to select the mixture parameters that minimize the distance between the discrete estimate of the PDF and the estimate of the expectation maximization. The validity of the proposed approach is verified using bivariate simulation data.}, keywords = {Automatic speech recognition, bivariate simulation data, Computational modeling, Computer vision, Density functional theory, expectation maximization technique, learning automata, mixture parameters estimation, nonparametric expectation maximization, nonparametric statistics, optimisation, parameter estimation, Parzen Window, PDF, probability, probability density function, Speech processing, Speech recognition, stochastic learning automata, Stochastic processes, stochastic technique}, isbn = {0-7803-7952-7}, doi = {10.1109/ICSMC.2003.1244347}, author = {Abd-Almageed, Wael and El-Osery,A. and Smith,C.E.} } @conference {12055, title = {Polydioptric camera design and 3D motion estimation}, booktitle = {2003 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2003. Proceedings}, volume = {2}, year = {2003}, month = {2003/06/18/20}, pages = {II- 294-301 vol.2 - II- 294-301 vol.2}, publisher = {IEEE}, organization = {IEEE}, abstract = {Most cameras used in computer vision applications are still based on the pinhole principle inspired by our own eyes. It has been found though that this is not necessarily the optimal image formation principle for processing visual information using a machine. We describe how to find the optimal camera for 3D motion estimation by analyzing the structure of the space formed by the light rays passing through a volume of space. Every camera corresponds to a sampling pattern in light ray space, thus the question of camera design can be rephrased as finding the optimal sampling pattern with regard to a given task. This framework suggests that large field-of-view multi-perspective (polydioptric) cameras are the optimal image sensors for 3D motion estimation. We conclude by proposing design principles for polydioptric cameras and describe an algorithm for such a camera that estimates its 3D motion in a scene independent and robust manner.}, keywords = {3D motion estimation, Algorithm design and analysis, Application software, CAMERAS, Computer vision, Eyes, field-of-view camera, Image motion analysis, image sampling, image sensor, Image sensors, Layout, light ray, Motion estimation, multiperspective camera, optimal camera, optimal image formation, optimal sampling pattern, pinhole principle, polydioptric camera design, ray space, scene independent estimation, space structure analysis, stereo image processing, visual information processing}, isbn = {0-7695-1900-8}, doi = {10.1109/CVPR.2003.1211483}, author = {Neumann, J. and Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @article {15541, title = {Algorithmic issues in modeling motion}, journal = {ACM Comput. Surv.}, volume = {34}, year = {2002}, month = {2002/12//}, pages = {550 - 572}, abstract = {This article is a survey of research areas in which motion plays a pivotal role. The aim of the article is to review current approaches to modeling motion together with related data structures and algorithms, and to summarize the challenges that lie ahead in producing a more unified theory of motion representation that would be useful across several disciplines.}, keywords = {computational geometry, Computer vision, mobile networks, modeling, molecular biology, motion modeling, physical simulation, robotoics, spatio-temporal databases}, isbn = {0360-0300}, doi = {10.1145/592642.592647}, url = {http://doi.acm.org/10.1145/592642.592647}, author = {Agarwal,Pankaj K. and Guibas,Leonidas J. and Edelsbrunner,Herbert and Erickson,Jeff and Isard,Michael and Har-Peled,Sariel and Hershberger,John and Jensen,Christian and Kavraki,Lydia and Koehl,Patrice and Lin,Ming and Manocha,Dinesh and Metaxas,Dimitris and Mirtich,Brian and Mount, Dave and Muthukrishnan,S. and Pai,Dinesh and Sacks,Elisha and Snoeyink,Jack and Suri,Subhash and Wolefson,Ouri} } @conference {11904, title = {Contour migration: solving object ambiguity with shape-space visual guidance}, booktitle = {IEEE/RSJ International Conference on Intelligent Robots and Systems, 2002}, volume = {1}, year = {2002}, month = {2002///}, pages = {330- 335 vol.1 - 330- 335 vol.1}, publisher = {IEEE}, organization = {IEEE}, abstract = {A fundamental problem in computer vision is the issue of shape ambiguity. Simply stated, a silhouette cannot uniquely identify an object or an object{\textquoteright}s classification since many unique objects can present identical occluding contours. This problem has no solution in the general case for a monocular vision system. This paper presents a method for disambiguating objects during silhouette matching using a visual servoing system. This method identifies the camera motion(s) that gives disambiguating views of the objects. These motions are identified through a new technique called contour migration. The occluding contour{\textquoteright}s shape is used to identify objects or object classes that are potential matches for that shape. A contour migration is then determined that disambiguates the possible matches by purposive viewpoint adjustment. The technique is demonstrated using an example set of objects.}, keywords = {Artificial intelligence, camera motion, CAMERAS, Computer vision, contour migration, Databases, edge detection, Intelligent robots, Laboratories, Machine vision, object ambiguity, Object recognition, pattern matching, Robot vision systems, servomechanisms, SHAPE, shape matching, shape-space visual guidance, silhouette matching, visual servoing}, isbn = {0-7803-7398-7}, doi = {10.1109/IRDS.2002.1041410}, author = {Abd-Almageed, Wael and Smith,C.E.} } @conference {11894, title = {Hidden Markov models for silhouette classification}, booktitle = {Automation Congress, 2002 Proceedings of the 5th Biannual World}, volume = {13}, year = {2002}, month = {2002///}, pages = {395 - 402}, publisher = {IEEE}, organization = {IEEE}, abstract = {In this paper, a new technique for object classification from silhouettes is presented. Hidden Markov models are used as a classification mechanism. Through a set of experiments, we show the validity of our approach and show its invariance under severe rotation conditions. Also, a comparison with other techniques that use hidden Markov models for object classification from silhouettes is presented.}, keywords = {Computer vision, Feature extraction, Fourier transforms, hidden Markov models, HMM, image classification, Neural networks, object classification, Object recognition, parameter estimation, pattern recognition, Probability distribution, Shape measurement, silhouette classification, Wavelet transforms}, isbn = {1-889335-18-5}, doi = {10.1109/WAC.2002.1049575}, author = {Abd-Almageed, Wael and Smith,C.} } @conference {11899, title = {Mixture models for dynamic statistical pressure snakes}, booktitle = {16th International Conference on Pattern Recognition, 2002. Proceedings}, volume = {2}, year = {2002}, month = {2002///}, pages = {721- 724 vol.2 - 721- 724 vol.2}, publisher = {IEEE}, organization = {IEEE}, abstract = {This paper introduces a new approach to statistical pressure snakes. It uses statistical modeling for both object and background to obtain a more robust pressure model. The Expectation Maximization (EM) algorithm is used to model the data into a Mixture of Gaussians (MoG). Bayesian theory is then employed as a decision making mechanism. Experimental results using the traditional pressure model and the new mixture pressure model demonstrate the effectiveness of the new models.}, keywords = {active contour models, Active contours, Artificial intelligence, Bayes methods, Bayesian methods, Bayesian theory, complex colored object, Computer vision, decision making, decision making mechanism, dynamic statistical pressure snakes, Equations, expectation maximization algorithm, Gaussian distribution, image colour analysis, Image edge detection, Image segmentation, Intelligent robots, mixture models, mixture of Gaussians, mixture pressure model, Robot vision systems, robust pressure model, Robustness, segmentation results, statistical analysis, statistical modeling}, isbn = {0-7695-1695-X}, doi = {10.1109/ICPR.2002.1048404}, author = {Abd-Almageed, Wael and Smith,C.E.} } @conference {11901, title = {A non-intrusive Kalman filter-based tracker for pursuit eye movement}, booktitle = {American Control Conference, 2002. Proceedings of the 2002}, volume = {2}, year = {2002}, month = {2002///}, pages = {1443- 1447 vol.2 - 1443- 1447 vol.2}, publisher = {IEEE}, organization = {IEEE}, abstract = {In this paper, we introduce a new non-intrusive approach to estimating the eye position during pursuit motion of the eye. We introduce a new characterization for the pursuit eye movement. Our characterization is based on the decomposition of the pursuit eye motion into a deterministic component and random component. We use a discrete Kalman filter to estimate the random component and calculate the deterministic component. We add the two components to obtain an estimate of the eye position. Simulation results are provided to illustrate the eye position estimation.}, keywords = {Application software, characterization, Computer vision, Current measurement, deterministic component, Electric variables measurement, eye position estimation, eye tracking, gaze tracking, Human computer interaction, Kalman filter, Kalman filters, Lenses, Motion estimation, Optical reflection, pursuit eye movement, pursuit motion, random component, Skin, tracking}, isbn = {0-7803-7298-0}, doi = {10.1109/ACC.2002.1023224}, url = {http://ieeexplore.ieee.org/ielx5/7965/22015/01023224.pdf?tp=\&arnumber=1023224\&isnumber=22015}, author = {Abd-Almageed, Wael and Fadali,M. S and Bebis,G.} } @conference {18470, title = {Virtual audio system customization using visual matching of ear parameters}, booktitle = {16th International Conference on Pattern Recognition, 2002. Proceedings}, volume = {3}, year = {2002}, month = {2002///}, pages = {1003- 1006 vol.3 - 1003- 1006 vol.3}, publisher = {IEEE}, organization = {IEEE}, abstract = {Applications in the creation of virtual auditory spaces (VAS) and sonification require individualized head related transfer functions (HRTFs) for perceptual fidelity. HRTFs exhibit significant variation from person to person due to differences between their pinnae, and their body sizes. We propose and preliminarily implement a simple HRTF customization based on the use of a published database of HRTFs (Algazi et al., 2001) that also contains geometrical measurements of subject pinnae. We measure some of these features via simple image processing, and select the HRTF that has features most closely corresponding to the individual{\textquoteright}s features. This selection procedure is implemented along with the virtual auditory system described in (Zotkin et al., 2002), and listener tests conducted comparing the customized HRTF and a fixed HRTF. Despite the simplicity of the method, tests reveal average improvement in localization accuracy of about 25 percent, though performance improvement varies with source location and individuals.}, keywords = {acoustic signal processing, Audio systems, Auditory system, Computer vision, database, Ear, ear parameter matching, geometrical measurements, Head, head related transfer functions, HRTF customization, Image databases, IMAGE PROCESSING, medical image processing, performance improvement, Position measurement, sonification, Spatial databases, System testing, Transfer functions, virtual audio system customization, virtual auditory spaces, virtual auditory system, visual matching}, isbn = {0-7695-1695-X}, doi = {10.1109/ICPR.2002.1048207}, author = {Zotkin,Dmitry N and Duraiswami, Ramani and Davis, Larry S. and Mohan,A. and Raykar,V.} } @article {12060, title = {Visual space-time geometry - A tool for perception and the imagination}, journal = {Proceedings of the IEEE}, volume = {90}, year = {2002}, month = {2002/07//}, pages = {1113 - 1135}, abstract = {Although the fundamental ideas underlying research efforts in the field of computer vision have not radically changed in the past two decades, there has been a transformation in the way work in this field is conducted. This is primarily due to the emergence of a number of tools, of both a practical and a theoretical nature. One such tool, celebrated throughout the nineties, is the geometry of visual space-time. It is known under a variety of headings, such as multiple view geometry, structure from motion, and model building. It is a mathematical theory relating multiple views (images) of a scene taken at different viewpoints to three-dimensional models of the (possibly dynamic) scene. This mathematical theory gave rise to algorithms that take as input images (or video) and provide as output a model of the scene. Such algorithms are one of the biggest successes of the field and they have many applications in other disciplines, such as graphics (image-based rendering, motion capture) and robotics (navigation). One of the difficulties, however is that the current tools cannot yet be fully automated, and they do not provide very accurate results. More research is required for automation and high precision. During the past few years we have investigated a number of basic questions underlying the structure from motion problem. Our investigations resulted in a small number of principles that characterize the problem. These principles, which give rise to automatic procedures and point to new avenues for studying the next level of the structure from motion problem, are the subject of this paper.}, keywords = {3-D motion estimation, Buildings, Computer vision, Geometry, Graphics, Image sequences, Layout, Mathematical model, mathematical theory, model building, Motion estimation, multiple view geometry, multiple views, Navigation, optical flow, optical illusions, patch correspondence, Rendering (computer graphics), Robotics and automation, Solid modeling, structure from motion, three-dimensional models, visual space-time}, isbn = {0018-9219}, doi = {10.1109/JPROC.2002.801440}, author = {Ferm{\"u}ller, Cornelia and Baker, P. and Aloimonos, J.} } @conference {14188, title = {A spherical eye from multiple cameras (makes better models of the world)}, booktitle = {Proceedings of the 2001 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 2001. CVPR 2001}, volume = {1}, year = {2001}, month = {2001///}, pages = {I-576- I-583 vol.1 - I-576- I-583 vol.1}, publisher = {IEEE}, organization = {IEEE}, abstract = {The paper describes an imaging system that has been designed specifically for the purpose of recovering egomotion and structure from video. The system consists of six cameras in a network arranged so that they sample different parts of the visual sphere. This geometric configuration has provable advantages compared to small field of view cameras for the estimation of the system{\textquoteright}s own motion and consequently the estimation of shape models from the individual cameras. The reason is that inherent ambiguities of confusion between translation and rotation disappear. We provide algorithms for the calibration of the system and 3D motion estimation. The calibration is based on a new geometric constraint that relates the images of lines parallel in space to the rotation between the cameras. The 3D motion estimation uses a constraint relating structure directly to image gradients.}, keywords = {3D motion estimation, Calibration, camera network, CAMERAS, Computer vision, egomotion recovery, geometric configuration, geometric constraint, image gradients, image sampling, imaging system, Laboratories, Layout, Motion estimation, multiple cameras, Pixel, Robot vision systems, SHAPE, shape models, Space technology, spherical eye, system calibration, video, video cameras, video signal processing, visual sphere sampling}, isbn = {0-7695-1272-0}, doi = {10.1109/CVPR.2001.990525}, author = {Baker, P. and Ferm{\"u}ller, Cornelia and Aloimonos, J. and Pless, R.} } @conference {18463, title = {An audio-video front-end for multimedia applications}, booktitle = {2000 IEEE International Conference on Systems, Man, and Cybernetics}, volume = {2}, year = {2000}, month = {2000///}, pages = {786-791 vol.2 - 786-791 vol.2}, publisher = {IEEE}, organization = {IEEE}, abstract = {Applications such as video gaming, virtual reality, multimodal user interfaces and videoconferencing, require systems that can locate and track persons in a room through a combination of visual and audio cues, enhance the sound that they produce, and perform identification. We describe the development of a particular multimodal sensor fusion system that is portable, runs in real time and achieves these objectives. The system employs novel algorithms for acoustical source location, video-based person tracking and overall system control, which are also described}, keywords = {Acoustic noise, acoustical source location, Application software, audio cues, audio-video front-end, CAMERAS, Computer vision, Microphones, multimedia applications, multimedia systems, multimodal sensor fusion system, multimodal user interfaces, Position measurement, REAL TIME, Real time systems, real-time systems, sensor fusion, sound, Speech recognition, User interfaces, video cameras, video gaming, video-based person tracking, Videoconference, videoconferencing, Virtual reality, visual cues, Working environment noise}, isbn = {0-7803-6583-6}, doi = {10.1109/ICSMC.2000.885945}, author = {Zotkin,Dmitry N and Duraiswami, Ramani and Davis, Larry S. and Haritaoglu,I.} } @conference {14232, title = {Multi-camera networks: eyes from eyes}, booktitle = {IEEE Workshop on Omnidirectional Vision, 2000. Proceedings}, year = {2000}, month = {2000///}, pages = {11 - 18}, publisher = {IEEE}, organization = {IEEE}, abstract = {Autonomous or semi-autonomous intelligent systems, in order to function appropriately, need to create models of their environment, i.e., models of space time. These are descriptions of objects and scenes and descriptions of changes of space over time, that is, events and actions. Despite the large amount of research on this problem, as a community we are still far from developing robust descriptions of a system{\textquoteright}s spatiotemporal environment using video input (image sequences). Undoubtedly, some progress has been made regarding the understanding of estimating the structure of visual space, but it has not led to solutions to specific applications. There is, however, an alternative approach which is in line with today{\textquoteright}s {\textquotedblleft}zeitgeist.{\textquotedblright} The vision of artificial systems can be enhanced by providing them with new eyes. If conventional video cameras are put together in various configurations, new sensors can be constructed that have much more power and the way they {\textquotedblleft}see{\textquotedblright} the world makes it much easier to solve problems of vision. This research is motivated by examining the wide variety of eye design in the biological world and obtaining inspiration for an ensemble of computational studies that relate how a system sees to what that system does (i.e. relating perception to action). This, coupled with the geometry of multiple views that has flourished in terms of theoretical results in the past few years, points to new ways of constructing powerful imaging devices which suit particular tasks in robotics, visualization, video processing, virtual reality and various computer vision applications, better than conventional cameras. This paper presents a number of new sensors that we built using common video cameras and shows their superiority with regard to developing models of space and motion}, keywords = {Biosensors, CAMERAS, Computer vision, Eyes, Image sequences, intelligent systems, Layout, Machine vision, Robot vision systems, Robustness, Spatiotemporal phenomena, video cameras, Virtual reality}, isbn = {0-7695-0704-2}, doi = {10.1109/OMNVIS.2000.853797}, author = {Ferm{\"u}ller, Cornelia and Aloimonos, J. and Baker, P. and Pless, R. and Neumann, J. and Stuart, B.} } @conference {14267, title = {The statistics of optical flow: implications for the process of correspondence in vision}, booktitle = {15th International Conference on Pattern Recognition, 2000. Proceedings}, volume = {1}, year = {2000}, month = {2000///}, pages = {119-126 vol.1 - 119-126 vol.1}, publisher = {IEEE}, organization = {IEEE}, abstract = {This paper studies the three major categories of flow estimation methods: gradient-based, energy-based, and correlation methods; it analyzes different ways of compounding 1D motion estimates (image gradients, spatio-temporal frequency triplets, local correlation estimates) into 2D velocity estimates, including linear and nonlinear methods. Correcting for the bias would require knowledge of the noise parameters. In many situations, however, these are difficult to estimate accurately, as they change with the dynamic imagery in unpredictable and complex ways. Thus, the bias really is a problem inherent to optical flow estimation. We argue that the bias is also integral to the human visual system. It is the cause of the illusory perception of motion in the Ouchi pattern and also explains various psychophysical studies of the perception of moving plaids. Finally, the implication of the analysis is that flow or correspondence can be estimated very accurately only when feedback is utilized}, keywords = {Bias, Computer vision, correlation, correlation methods, energy-based method, flow estimation, Frequency estimation, gradient method, gradient methods, Image analysis, Image motion analysis, Image sequences, least squares, least squares approximations, Motion estimation, Nonlinear optics, Optical feedback, optical flow, Optical harmonic generation, Optical noise, Statistics, Visual perception}, isbn = {0-7695-0750-6}, doi = {10.1109/ICPR.2000.905288}, author = {Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @conference {18311, title = {Estimation of composite object and camera image motion}, booktitle = {The Proceedings of the Seventh IEEE International Conference on Computer Vision, 1999}, volume = {1}, year = {1999}, month = {1999///}, pages = {190-197 vol.1 - 190-197 vol.1}, publisher = {IEEE}, organization = {IEEE}, abstract = {An approach for estimating composite independent object and camera image motions is proposed. The approach employs spatio-temporal flow models learned through observing typical movements of the object, to decompose image motion into independent object and camera motions. The spatio-temporal flow models of the object motion are represented as a set of orthogonal flow bases that are learned using principal component analysis of instantaneous flow measurements from a stationary camera. These models are then employed in scenes with a moving camera to extract motion trajectories relative to those learned. The performance of the algorithm is demonstrated on several image sequences of rigid and articulated bodies in motion}, keywords = {articulated bodies, camera image motion, camera image motions, CAMERAS, composite independent object, composite object estimation, Computer vision, Educational institutions, Image sequences, instantaneous flow measurements, Laboratories, Layout, Motion analysis, Motion estimation, motion trajectories, orthogonal flow bases, Principal component analysis, spatio-temporal flow models, tracking, Vehicles}, isbn = {0-7695-0164-8}, doi = {10.1109/ICCV.1999.791217}, author = {Yacoob,Yaser and Davis, Larry S.} } @conference {12047, title = {Which shape from motion?}, booktitle = {Sixth International Conference on Computer Vision, 1998}, year = {1998}, month = {1998/01/04/7}, pages = {689 - 695}, publisher = {IEEE}, organization = {IEEE}, abstract = {In a practical situation, the rigid transformation relating different views is recovered with errors. In such a case, the recovered depth of the scene contains errors, and consequently a distorted version of visual space is computed. What then are meaningful shape representations that can be computed from the images? The result presented in this paper states that if the rigid transformation between different views is estimated in a way that gives rise to a minimum number of negative depth values, then at the center of the image affine shape can be correctly computed. This result is obtained by exploiting properties of the distortion function. The distortion model turns out to be a very powerful tool in the analysis and design of 3D motion and shape estimation algorithms, and as a byproduct of our analysis we present a computational explanation of psychophysical results demonstrating human visual space distortion from motion information}, keywords = {3D motion estimation, affine shape, Algorithm design and analysis, Computer vision, distorted version, distortion function, human visual space distortion, HUMANS, Image motion analysis, image representation, Information analysis, Layout, Motion analysis, Motion estimation, motion information, Psychology, rigid transformation, SHAPE, shape estimation, shape representations, State estimation, visual space}, isbn = {81-7319-221-9}, doi = {10.1109/ICCV.1998.710792}, author = {Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @article {18317, title = {Recognizing human facial expressions from long image sequences using optical flow}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {18}, year = {1996}, month = {1996/06//}, pages = {636 - 642}, abstract = {An approach to the analysis and representation of facial dynamics for recognition of facial expressions from image sequences is presented. The algorithms utilize optical flow computation to identify the direction of rigid and nonrigid motions that are caused by human facial expressions. A mid-level symbolic representation motivated by psychological considerations is developed. Recognition of six facial expressions, as well as eye blinking, is demonstrated on a large set of image sequences}, keywords = {Computer vision, Eyebrows, face recognition, facial dynamics, Facial features, human facial expression recognition, HUMANS, Image motion analysis, image recognition, image representation, Image sequences, Motion analysis, Motion estimation, Optical computing, optical flow, symbolic representation, tracking}, isbn = {0162-8828}, doi = {10.1109/34.506414}, author = {Yacoob,Yaser and Davis, Larry S.} } @conference {12033, title = {3D motion representations in visual servo control}, booktitle = {Proceedings of International Symposium on Computer Vision, 1995}, year = {1995}, month = {1995/11/21/23}, pages = {61 - 66}, publisher = {IEEE}, organization = {IEEE}, abstract = {A new approach to visual servoing and vision-guided robotics is introduced. This approach uses visual information for autonomous behavior. It amounts to using robust, global spatiotemporal representations easily extracted from the dynamic imagery. Specifically, the geometrical patterns of normal flows are used as the input to the servo mechanism. It is shown that the positions of these patterns are related to the three dimensional motion parameters. By locating the positions of these patterns, we can solve a variety of navigational problems with little computational effort}, keywords = {3D motion representations, autonomous behavior, CAMERAS, computational effort, Computer vision, dynamic imagery, global spatiotemporal representations, image representation, Mobile robots, motion control, Navigation, navigational problems, Optical computing, Robot kinematics, Robot sensing systems, robot vision, Robot vision systems, Robotics and automation, servomechanisms, Servosystems, Spatiotemporal phenomena, vision-guided robotics, visual information, visual servo control, visual servoing}, isbn = {0-8186-7190-4}, doi = {10.1109/ISCV.1995.476978}, author = {Ferm{\"u}ller, Cornelia and LoongFah Cheong and Aloimonos, J.} } @conference {14168, title = {Global rigidity constraints in image displacement fields}, booktitle = {, Fifth International Conference on Computer Vision, 1995. Proceedings}, year = {1995}, month = {1995/06/20/23}, pages = {245 - 250}, publisher = {IEEE}, organization = {IEEE}, abstract = {Image displacement fields-optical flow fields, stereo disparity fields, normal flow fields-due to rigid motion possess a global geometric structure which is independent of the scene in view. Motion vectors of certain lengths and directions are constrained to lie on the imaging surface at particular loci whose location and form depends solely on the 3D motion parameters. If optical flow fields or stereo disparity fields are considered, then equal vectors are shown to lie on conic sections. Similarly, for normal motion fields, equal vectors lie within regions whose boundaries also constitute conics. By studying various properties of these curves and regions and their relationships, a characterization of the structure of rigid motion fields is given. The goal of this paper is to introduce a concept underlying the global structure of image displacement fields. This concept gives rise to various constraints that could form the basis of algorithms for the recovery of visual information from multiple views}, keywords = {3D motion parameters, algorithms, Automation, Computer science, Computer vision, conic sections, curves, equal vectors, Fluid flow measurement, global geometric structure, global rigidity constraints, image displacement fields, Image motion analysis, Image segmentation, Image sequences, imaging surface, Laboratories, Layout, Motion estimation, Motion measurement, motion vectors, multiple views, normal flow fields, optical flow fields, regions, rigid motion, stereo disparity fields, Stereo vision, vectors, visual information recovery}, isbn = {0-8186-7042-8}, doi = {10.1109/ICCV.1995.466779}, author = {Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @conference {11982, title = {Global rigidity constraints in image displacement fields}, booktitle = {Proceedings of Fifth International Conference on Computer Vision, 1995}, year = {1995}, month = {1995/06/20/23}, pages = {245 - 250}, publisher = {IEEE}, organization = {IEEE}, abstract = {Image displacement fields-optical flow fields, stereo disparity fields, normal flow fields-due to rigid motion possess a global geometric structure which is independent of the scene in view. Motion vectors of certain lengths and directions are constrained to lie on the imaging surface at particular loci whose location and form depends solely on the 3D motion parameters. If optical flow fields or stereo disparity fields are considered, then equal vectors are shown to lie on conic sections. Similarly, for normal motion fields, equal vectors lie within regions whose boundaries also constitute conics. By studying various properties of these curves and regions and their relationships, a characterization of the structure of rigid motion fields is given. The goal of this paper is to introduce a concept underlying the global structure of image displacement fields. This concept gives rise to various constraints that could form the basis of algorithms for the recovery of visual information from multiple views}, keywords = {3D motion parameters, algorithms, Automation, Computer science, Computer vision, conic sections, curves, equal vectors, Fluid flow measurement, global geometric structure, global rigidity constraints, image displacement fields, Image motion analysis, Image segmentation, Image sequences, imaging surface, Laboratories, Layout, Motion estimation, Motion measurement, motion vectors, multiple views, normal flow fields, optical flow fields, regions, rigid motion, stereo disparity fields, Stereo vision, vectors, visual information recovery}, isbn = {0-8186-7042-8}, doi = {10.1109/ICCV.1995.466779}, author = {Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @book {13298, title = {Image Analysis and Processing: 8th International Conference, Iciap {\textquoteright}95, San Remo, Italy, September 13-15, 1995 : Proceedings}, year = {1995}, month = {1995/09/28/}, publisher = {Springer}, organization = {Springer}, abstract = {This book presents the proceedings of the 8th International Conference on Image Analysis and Processing, ICIAP {\textquoteright}95, held in Sanremo, Italy in September 1995 under the sponsorship of the International Association of Pattern Recognition IAPR.The volume presents 108 papers selected from more than 180 submissions together with six invited contributions. The papers are written by a total of 265 contributing authors and give a comprehensive state-of-the-art report on all current issues of image analysis and processing. Theoretical aspects are addressed as well as systems design and advanced applications, particularly in medical imaging.}, keywords = {Artificial intelligence, COMPUTER AIDED DESIGN, Computer Graphics, Computer science, Computer vision, Computers / CAD-CAM, Computers / Computer Graphics, Computers / Computer Science, Computers / Computer Vision \& Pattern Recognition, Computers / Image Processing, Computers / Intelligence (AI) \& Semantics, Computers / Optical Data Processing, Computers / Software Development \& Engineering / General, Electronic books, IMAGE PROCESSING, Image processing/ Congresses, Imaging systems, Optical data processing, Optical pattern recognition, software engineering}, isbn = {9783540602989}, author = {Braccini,Carlo and De Floriani, Leila and Vernazza,Gianni} } @conference {14171, title = {The information in the direction of image flow}, booktitle = {, International Symposium on Computer Vision, 1995. Proceedings}, year = {1995}, month = {1995/11/21/23}, pages = {461 - 466}, publisher = {IEEE}, organization = {IEEE}, abstract = {If instead of the full motion field, we consider only the direction of the motion field due to a rigid motion, what can we say about the information regarding the three-dimensional motion? In this paper it is shown that considering as the imaging surface the whole sphere, independently of the scene in view, two different rigid motions cannot give rise to the same directional motion field. If we restrict the image to half of a sphere (or an infinitely large image plane) two different rigid motions with instantaneous translational and rotational velocities (t1, ω1) and (t2, ω2) cannot give rise to the same directional motion field unless the plane through t1 and t2 is perpendicular to the plane through ω1 and ω2 (i.e., (t1{\texttimes}t2){\textperiodcentered}(ω1 {\texttimes}ω2)=0). In addition, in order to give a practical significance to these uniqueness results for the case of a limited field of view we also characterize the locations on the image where the motion vectors due to the different motions must have different directions. If (ω1{\texttimes}ω2){\textperiodcentered}(t1 {\texttimes}t2)=0 and certain additional constraints are met, then the two rigid motions could produce motion fields with the same direction. For this to happen the depth of each corresponding surface has to be within a certain range, defined by a second and a third order surface}, keywords = {Automation, CAMERAS, Computer vision, Educational institutions, image flow, Image motion analysis, Image sequences, imaging surface, Laboratories, Layout, Motion analysis, Motion estimation, motion field, motion vectors, Optical imaging, rigid motion, rigid motions, three-dimensional motion}, isbn = {0-8186-7190-4}, doi = {10.1109/ISCV.1995.477071}, author = {Brodsky, T. and Ferm{\"u}ller, Cornelia and Aloimonos, J.} } @conference {12031, title = {Iso-distortion contours and egomotion estimation}, booktitle = {Proceedings of International Symposium on Computer Vision, 1995}, year = {1995}, month = {1995/11/21/23}, pages = {55 - 60}, publisher = {IEEE}, organization = {IEEE}, abstract = {This paper introduces the framework of iso-distortion contour to deal with the problem of depth distortion due to erroneous motion estimates, and various related aspects such as the effectiveness of the visibility constraint. The framework can also be used to inquire the uniqueness aspect of normal flow. Future work will examine the implications of the iso-distortion contours for the problem of multiple frame integration}, keywords = {Automation, Computer vision, Degradation, depth distortion, Educational institutions, egomotion estimation, Equations, erroneous motion estimates, Error analysis, HUMANS, Image sequences, iso-distortion contours, Laboratories, Layout, Motion estimation, Robustness, visibility constraint}, isbn = {0-8186-7190-4}, doi = {10.1109/ISCV.1995.476977}, author = {LoongFah Cheong and Aloimonos, J.} } @article {14189, title = {A syntactic approach to scale-space-based corner description}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, volume = {16}, year = {1994}, month = {1994/07//}, pages = {748 - 751}, abstract = {Planar curves are described by information about corners integrated over various levels of resolution. The detection of corners takes place on a digital representation. To compensate for ambiguities arising from sampling problems due to the discreteness, results about the local behavior of curvature extrema in continuous scale-space are employed}, keywords = {Computer vision, corner detection, curvature extrema, edge detection, IMAGE PROCESSING, image resolution, Image segmentation, Laboratories, Large-scale systems, PARALLEL PROCESSING, pattern recognition, planar curves, resolution, Sampling methods, sampling problems, scale space based corner description, SHAPE, Smoothing methods, syntactic approach}, isbn = {0162-8828}, doi = {10.1109/34.297957}, author = {Ferm{\"u}ller, Cornelia and Kropatsch,W.} } @conference {18297, title = {Early vision processing using a multi-stage diffusion process}, booktitle = {1993 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 1993. Proceedings CVPR {\textquoteright}93}, year = {1993}, month = {1993/06//}, pages = {41 - 46}, publisher = {IEEE}, organization = {IEEE}, abstract = {The use of a multistage diffusion process in the early processing of range data is examined. The input range data are interpreted as occupying a volume in 3-D space. Each diffusion stage simulates the process of diffusing part of the boundary of the volume into the volume. The outcome of the process can be used for both discontinuity detection and segmentation into shape homogeneous regions. The process is applied to synthetic noise-free and noisy step, roof, and valley edges as well as to real range images}, keywords = {3-D space, Computational modeling, Computer vision, Diffusion processes, discontinuity detection, early vision processing, Educational institutions, Image edge detection, Image segmentation, Laboratories, multistage diffusion process, Noise shaping, noise-free edges, noisy edges, Performance analysis, roof edges, segmentation, SHAPE, shape homogeneous regions, step edges, valley edges}, isbn = {0-8186-3880-X}, doi = {10.1109/CVPR.1993.341003}, author = {Yacoob,Yaser and Davis, Larry S.} } @conference {14174, title = {Global 3D motion estimation}, booktitle = {, 1993 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 1993. Proceedings CVPR {\textquoteright}93}, year = {1993}, month = {1993/06/15/17}, pages = {415 - 421}, publisher = {IEEE}, organization = {IEEE}, abstract = {It is shown how a monocular observer can estimate its 3D motion relative to the scene by using normal flow measurements in a global and qualitative way. It is proved that local normal flow measurements form global patterns in the image plane. The position of these patterns is related to the 3D motion parameters. By locating some of these patterns, which depend only on subsets of the motion parameters, through a simple search technique, the 3D motion parameters can be found. The proposed algorithmic procedure is very robust, since it is not affected by small perturbations in the normal flow measurements. The direction of translation and the axis of rotation can be estimated with up to 100\% error in the image measurements}, keywords = {3D motion parameters, Automation, axis of rotation, Computer vision, direction of translation, Educational institutions, Equations, Fluid flow measurement, global 3D motion estimation, Laboratories, Layout, monocular observer, Motion estimation, Motion measurement, normal flow measurements, Robustness, Rotation measurement, search problems, search technique, State estimation}, isbn = {0-8186-3880-X}, doi = {10.1109/CVPR.1993.341097}, author = {Ferm{\"u}ller, Cornelia} } @conference {14176, title = {Motion constraint patterns}, booktitle = {, Proceedings of IEEE Workshop on Qualitative Vision, 1993}, year = {1993}, month = {1993/06/14/}, pages = {128 - 139}, publisher = {IEEE}, organization = {IEEE}, abstract = {The problem of egomotion recovery has been treated by using as input local image motion, with the published algorithms utilizing the geometric constraint relating 2-D local image motion (optical flow, correspondence, derivatives of the image flow) to 3-D motion and structure. Since it has proved very difficult to achieve accurate input (local image motion), a lot of effort has been devoted to the development of robust techniques. A new approach to the problem of egomotion estimation is taken, based on constraints of a global nature. It is proved that local normal flow measurements form global patterns in the image plane. The position of these patterns is related to the three dimensional motion parameters. By locating some of these patterns, which depend only on subsets of the motion parameters, through a simple search technique, the 3-D motion parameters can be found. The proposed algorithmic procedure is very robust, since it is not affected by small perturbations in the normal flow measurements. As a matter of fact, since only the sign of the normal flow measurement is employed, the direction of translation and the axis of rotation can be estimated with up to 100\% error in the image measurements}, keywords = {3D motion parameters, Automation, computational geometry, Computer vision, correspondence, Educational institutions, egomotion recovery, Fluid flow measurement, geometric constraint, Geometrical optics, Image motion analysis, image plane, Laboratories, local image motion, local normal flow measurements, Motion estimation, Motion measurement, motion parameters, optical flow, Rotation measurement}, isbn = {0-8186-3692-0}, doi = {10.1109/WQV.1993.262942}, author = {Ferm{\"u}ller, Cornelia} } @conference {12020, title = {Exploratory active vision: theory}, booktitle = {Proceedings of 1992 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 1992}, year = {1992}, month = {1992/06/15/18}, pages = {10 - 15}, publisher = {IEEE}, organization = {IEEE}, abstract = {An active approach to the integration of shape from x modules-here shape from shading and shape from texture-is proposed. The question of what constitutes a good motion for the active observer is addressed. Generally, the role of the visual system is to provide depth information to an autonomous robot; a trajectory module will then interpret it to determine a motion for the robot, which in turn will affect the visual information received. It is suggested that the motion can also be chosen so as to improve the performance of the visual system}, keywords = {active observer, Active shape model, active vision, Automation, autonomous robot, CAMERAS, Computer science, Computer vision, depth information, Laboratories, Layout, Mobile robots, Motion analysis, optical flow, Robots, shape from shading, shape from texture, shape from x modules, trajectory module, Visual system}, isbn = {0-8186-2855-3}, doi = {10.1109/CVPR.1992.223234}, author = {Herve,J. -Y and Aloimonos, J.} } @conference {14233, title = {Multi-resolution shape description by corners}, booktitle = {, 1992 IEEE Computer Society Conference on Computer Vision and Pattern Recognition, 1992. Proceedings CVPR {\textquoteright}92}, year = {1992}, month = {1992/06/15/18}, pages = {271 - 276}, publisher = {IEEE}, organization = {IEEE}, abstract = {A robust method for describing planar curves in multiple resolution using curvature information is presented. The method is developed by taking into account the discrete nature of digital images as well as the discrete aspect of a multiresolution structure (pyramid). The main contribution lies in the robustness of the technique, which is due to the additional information that is extracted from observing the behavior of corners in the whole pyramid. Furthermore, the resulting algorithm is conceptually simple and easily parallelizable. Theoretical results are developed analyzing the curvature of continuous curves in scale-space and showing the behavior of curvature extrema under varying scale. The results are used to eliminate any ambiguities that might arise from sampling problems due to the discreteness of the representation. Experimental results demonstrate the potential of the method}, keywords = {ambiguities, Automation, computational geometry, Computer vision, continuous curves, corners, curvature extrema, curvature information, curve fitting, digital images, Feature extraction, IMAGE PROCESSING, image resolution, Image segmentation, Laboratories, multiple resolution, multiresolution structure, parallelizable, planar curves, Robustness, Sampling methods, scale-space, SHAPE, Smoothing methods, varying scale}, isbn = {0-8186-2855-3}, doi = {10.1109/CVPR.1992.223264}, author = {Ferm{\"u}ller, Cornelia and Kropatsch,W.} } @conference {11990, title = {Relative depth from motion using normal flow: an active and purposive solution}, booktitle = {Proceedings of the IEEE Workshop on Visual Motion, 1991}, year = {1991}, month = {1991/10/07/9}, pages = {196 - 204}, publisher = {IEEE}, organization = {IEEE}, abstract = {The authors show how an active observer can compute the relative depth of (stationary or moving) objects in the field of view using only the spatiotemporal derivatives of the time varying image intensity function. The solution they propose is purposive in the sense that it solves only the relative depth from motion problem and cannot be used for other problems related to motion; active in the sense that the activity of the observer is essential for the solution of the problem. Results indicate that exact computation of retinal motion does not appear to be a necessary first step for some problems related to visual motion. In addition, optic flow, whose computation is an ill-posed problem, is related to the motion of the scene only under very restrictive assumptions. As a result, the use of optic flow in some quantitative motion analysis studies is questionable}, keywords = {3D information, Automation, Computer vision, Educational institutions, Image motion analysis, image recognition, Image sequences, Laboratories, Layout, Motion analysis, Motion estimation, Nonlinear optics, normal flow, optic flow, Optical computing, Optical sensors, relative depth, retinal motion, spatiotemporal derivatives, time varying image intensity function, visual motion}, isbn = {0-8186-2153-2}, doi = {10.1109/WVM.1991.212807}, author = {Huang, Liuqing and Aloimonos, J.} } @conference {11960, title = {Purposive and qualitative active vision}, booktitle = {Proceedings of 10th International Conference on Pattern Recognition, 1990}, volume = {i}, year = {1990}, month = {1990/06/16/21}, pages = {346-360 vol.1 - 346-360 vol.1}, publisher = {IEEE}, organization = {IEEE}, abstract = {The traditional view of the problem of computer vision as a recovery problem is questioned, and the paradigm of purposive-qualitative vision is offered as an alternative. This paradigm considers vision as a general recognition problem (recognition of objects, patterns or situations). To demonstrate the usefulness of the framework, the design of the Medusa of CVL is described. It is noted that this machine can perform complex visual tasks without reconstructing the world. If it is provided with intentions, knowledge of the environment, and planning capabilities, it can perform highly sophisticated navigational tasks. It is explained why the traditional structure from motion problem cannot be solved in some cases and why there is reason to be pessimistic about the optimal performance of a structure from motion module. New directions for future research on this problem in the recovery paradigm, e.g., research on stability or robustness, are suggested}, keywords = {active vision, Automation, brain models, complex visual tasks, Computer vision, environmental knowledge, highly sophisticated navigational tasks, HUMANS, Image reconstruction, intentions, Kinetic theory, Laboratories, Medusa, Motion analysis, Navigation, planning, planning (artificial intelligence), purposive-qualitative vision, recovery problem, Robust stability, Robustness, SHAPE, stability}, isbn = {0-8186-2062-5}, doi = {10.1109/ICPR.1990.118128}, author = {Aloimonos, J.} } @conference {11974, title = {Optimal motion estimation}, booktitle = {Workshop on Visual Motion, 1989.,Proceedings}, year = {1989}, month = {1989/03/20/22}, pages = {229 - 237}, publisher = {IEEE}, organization = {IEEE}, abstract = {The problem of using feature correspondences to recover the structure and 3D motion of a moving object from its successive images is analyzed. They formulate the problem as a quadratic minimization problem with a nonlinear constraint. Then they derive the condition for the solution to be optimal under the assumption of Gaussian noise in the input, in the maximum-likelihood-principle sense. The authors present two efficient ways to approximate it and discuss some inherent limitations of the structure-from-motion problem when two frames are used that should be taken into account in robotics applications that involve dynamic imagery. Finally, it is shown that some of the difficulties inherent in the two-frame approach disappear when redundancy in the data is introduced. This is concluded from experiments using a structure-from-motion algorithm that is based on multiple frames and uses only the rigidity assumption}, keywords = {3D motion interpretation, Automation, Computer vision, computerised pattern recognition, computerised picture processing, constraint minimization, dynamic imagery, Educational institutions, feature correspondences, Gaussian noise, Image motion analysis, Laboratories, maximum-likelihood-principle, Minimization methods, Motion analysis, Motion estimation, motion parameters, moving object, multiple frames, nonlinear constraint, Optical computing, optimal motion estimation, parameter estimation, quadratic minimization, quadratic programming, redundancy, rigidity assumption, robotics applications, structure-from-motion, successive images, two-frame}, isbn = {0-8186-1903-1}, doi = {10.1109/WVM.1989.47114}, author = {Spetsakis, M. E and Aloimonos, J.} } @conference {11957, title = {Optimal Computing Of Structure From Motion Using Point Correspondences In Two Frames}, booktitle = {Proceedings of Second International Conference on Computer Vision}, year = {1988}, month = {1988/12/05/8}, pages = {449 - 453}, publisher = {IEEE}, organization = {IEEE}, keywords = {Automation, Computer vision, Educational institutions, Gaussian noise, Image motion analysis, Laboratories, Least squares approximation, Least squares methods, Motion estimation, Optical computing}, isbn = {0-8186-0883-8}, doi = {10.1109/CCV.1988.590022}, author = {Spetsakis, M. E and Aloimonos, J.} } @article {11994, title = {Visual shape computation}, journal = {Proceedings of the IEEE}, volume = {76}, year = {1988}, month = {1988/08//}, pages = {899 - 916}, abstract = {Perceptual processes responsible for computing shape from several cues, including shading, texture, contour, and stereo, are examined. It is noted that these computational problems, as well as that of computing shaping from motion, are ill-posed in the sense of Hadamard. It is suggested that regularization theory can be used along with a priori knowledge to restrict the space of possible solutions, and thus restore the problem{\textquoteright}s well-prosedness. Some alternative methods are outlined, and the idea of active vision is explored briefly in connection with the problem}, keywords = {a priori knowledge, active vision, computational problems, Computer vision, computing shaping from motion, contour, cues, Focusing, HUMANS, ill posed problems, Machine vision, Psychology, regularization theory, RETINA, sense of Hadamard, shading, SHAPE, space of possible solutions, Stereo vision, Surface texture, TEXTURE, visual shape computation, Visual system}, isbn = {0018-9219}, doi = {10.1109/5.5964}, author = {Aloimonos, J.} } @article {11984, title = {Determining the Translation of a Rigidly Moving Surface, without Correspondence,}, year = {1986}, month = {1986/01//}, abstract = {A method is presented for the recovery of the three-dimensional translation of a rigidly moving textured object from its images. The novelty of the method consists of the fact that four cameras are used in order to avoid the solution of the correspondence problem. The method seems to be immune to small noise percentages and to have good behavior when the noise increases. Keywords: Computer vision.}, keywords = {*IMAGE PROCESSING, *OPTICAL DETECTION, *Translation(Image processing), CAMERAS, Computer vision, COMPUTERS, IMMUNITY., MOTION, MOVING TARGETS, NOISE, OPTICAL DETECTION AND DETECTORS, SURFACES, THREE DIMENSIONAL, VISION}, url = {http://stinet.dtic.mil/oai/oai?\&verb=getRecord\&metadataPrefix=html\&identifier=ADA179409}, author = {Aloimonos, J. and Basu, Anup} }