@conference {13070, title = {A novel feature descriptor based on the shearlet transform}, booktitle = {Image Processing (ICIP), 2011 18th IEEE International Conference on}, year = {2011}, month = {2011/09//}, pages = {1033 - 1036}, abstract = {Problems such as image classification, object detection and recognition rely on low-level feature descriptors to represent visual information. Several feature extraction methods have been proposed, including the Histograms of Oriented Gradients (HOG), which captures edge information by analyzing the distribution of intensity gradients and their directions. In addition to directions, the analysis of edge at different scales provides valuable information. Shearlet transforms provide a general framework for analyzing and representing data with anisotropic information at multiple scales. As a consequence, signal singularities, such as edges, can be precisely detected and located in images. Based on the idea of employing histograms to estimate the distribution of edge orientations and on the accurate multi-scale analysis provided by shearlet transforms, we propose a feature descriptor called Histograms of Shearlet Coefficients (HSC). Experimental results comparing HOG with HSC show that HSC provides significantly better results for the problems of texture classification and face identification.}, keywords = {analysis;multiscale, analysis;object, classification;face, classification;image, classification;intensity, coefficients;image, descriptor;feature, detection;object, distribution, edge, EXTRACTION, extraction;image, gradient, gradients;histograms, identification;feature, methods;histograms, of, orientations;face, oriented, recognition;feature, recognition;shearlet, recognition;transforms;, shearlet, singularities;texture, texture;object, transform;signal}, doi = {10.1109/ICIP.2011.6115600}, author = {Schwartz, W.R. and da Silva,R.D. and Davis, Larry S. and Pedrini,H.} } @conference {12486, title = {Pose estimation in heavy clutter using a multi-flash camera}, booktitle = {Robotics and Automation (ICRA), 2010 IEEE International Conference on}, year = {2010}, month = {2010/05//}, pages = {2028 - 2035}, abstract = {We propose a novel solution to object detection, localization and pose estimation with applications in robot vision. The proposed method is especially applicable when the objects of interest may not be richly textured and are immersed in heavy clutter. We show that a multi-flash camera (MFC) provides accurate separation of depth edges and texture edges in such scenes. Then, we reformulate the problem, as one of finding matches between the depth edges obtained in one or more MFC images to the rendered depth edges that are computed offline using 3D CAD model of the objects. In order to facilitate accurate matching of these binary depth edge maps, we introduce a novel cost function that respects both the position and the local orientation of each edge pixel. This cost function is significantly superior to traditional Chamfer cost and leads to accurate matching even in heavily cluttered scenes where traditional methods are unreliable. We present a sub-linear time algorithm to compute the cost function using techniques from 3D distance transforms and integral images. Finally, we also propose a multi-view based pose-refinement algorithm to improve the estimated pose. We implemented the algorithm on an industrial robot arm and obtained location and angular estimation accuracy of the order of 1 mm and 2 $\#$x00B0; respectively for a variety of parts with minimal texture.}, keywords = {3D, algorithm;object, based, camera;multiview, depth, detection;object, detection;pose, distance, edge, edges;cameras;image, edges;integral, estimation;binary, estimation;multiflash, estimation;robot, function;depth, images;location, localization;pose, maps, matching;cost, matching;image, pose-refinement, texture;object, transforms;angular, vision;texture, vision;transforms;}, doi = {10.1109/ROBOT.2010.5509897}, author = {Ming-Yu Liu and Tuzel, O. and Veeraraghavan,A. and Chellapa, Rama and Agrawal,A. and Okuda, H.} } @conference {13098, title = {Object detection via boosted deformable features}, booktitle = {Image Processing (ICIP), 2009 16th IEEE International Conference on}, year = {2009}, month = {2009/11//}, pages = {1445 - 1448}, abstract = {It is a common practice to model an object for detection tasks as a boosted ensemble of many models built on features of the object. In this context, features are defined as subregions with fixed relative locations and extents with respect to the object{\textquoteright}s image window. We introduce using deformable features with boosted ensembles. A deformable features adapts its location depending on the visual evidence in order to match the corresponding physical feature. Therefore, deformable features can better handle deformable objects. We empirically show that boosted ensembles of deformable features perform significantly better than boosted ensembles of fixed features for human detection.}, keywords = {boosted, detection;object, detection;statistics;, detection;visual, ensembles;deformable, evidence;feature, extraction;object, features;human}, doi = {10.1109/ICIP.2009.5414561}, author = {Hussein,M. and Porikli, F. and Davis, Larry S.} } @article {12545, title = {Object Detection, Tracking and Recognition for Multiple Smart Cameras}, journal = {Proceedings of the IEEE}, volume = {96}, year = {2008}, month = {2008/10//}, pages = {1606 - 1624}, abstract = {Video cameras are among the most commonly used sensors in a large number of applications, ranging from surveillance to smart rooms for videoconferencing. There is a need to develop algorithms for tasks such as detection, tracking, and recognition of objects, specifically using distributed networks of cameras. The projective nature of imaging sensors provides ample challenges for data association across cameras. We first discuss the nature of these challenges in the context of visual sensor networks. Then, we show how real-world constraints can be favorably exploited in order to tackle these challenges. Examples of real-world constraints are (a) the presence of a world plane, (b) the presence of a three-dimiensional scene model, (c) consistency of motion across cameras, and (d) color and texture properties. In this regard, the main focus of this paper is towards highlighting the efficient use of the geometric constraints induced by the imaging devices to derive distributed algorithms for target detection, tracking, and recognition. Our discussions are supported by several examples drawn from real applications. Lastly, we also describe several potential research problems that remain to be addressed.}, keywords = {algorithm;geometric, analysis;image, association;distributed, camera;visual, cameras;, cameras;object, colour, constraints;imaging, data, detection;object, detection;sensor, detection;three-dimiensional, fusion;target, model;video, network;distributed, recognition;object, scene, sensor, sensor;multiple, sensors;geometry;image, sensors;object, smart, texture;intelligent, tracking;target, tracking;video}, isbn = {0018-9219}, doi = {10.1109/JPROC.2008.928758}, author = {Sankaranarayanan,A. C and Veeraraghavan,A. and Chellapa, Rama} } @article {14889, title = {Robust and efficient detection of salient convex groups}, journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume = {18}, year = {1996}, month = {1996/01//}, pages = {23 - 37}, abstract = {This paper describes an algorithm that robustly locates salient convex collections of line segments in an image. The algorithm is guaranteed to find all convex sets of line segments in which the length of the gaps between segments is smaller than some fixed proportion of the total length of the lines. This enables the algorithm to find convex groups whose contours are partially occluded or missing due to noise. We give an expected case analysis of the algorithm performance. This demonstrates that salient convexity is unlikely to occur at random, and hence is a strong clue that grouped line segments reflect underlying structure in the scene. We also show that our algorithm run time is O(n 2log(n)+nm), when we wish to find the m most salient groups in an image with n line segments. We support this analysis with experiments on real data, and demonstrate the grouping system as part of a complete recognition system}, keywords = {complexity;computer, complexity;contours;image, computational, convex, detection;feature, detection;object, extraction;object, groups;computational, organisation;proximity;salient, recognition;, recognition;line, recognition;perceptual, segment, vision;edge}, isbn = {0162-8828}, doi = {10.1109/34.476008}, author = {Jacobs, David W.} }