@article {17842, title = {Visualization of large data sets with the Active Data Repository}, journal = {IEEE Computer Graphics and Applications}, volume = {21}, year = {2001}, month = {2001/08//Jul}, pages = {24 - 33}, abstract = {We implement ray-casting-based volume rendering and isosurface rendering methods using the Active Data Repository (ADR) for visualizing out-of-core data sets. We have developed the ADR object-oriented framework to provide support for applications that employ range queries with user-defined mapping and aggregation operations on large-scale multidimensional data. ADR targets distributed-memory parallel machines with one or more disks attached to each node. It is designed as a set of modular services implemented in C++, which can be customized for application-specific processing. The ADR runtime system supports common operations such as memory management, data retrieval, and scheduling of processing across a parallel machine}, keywords = {active data repository, ADR runtime system, Algorithm design and analysis, application-specific processing, C++, data mining, data retrieval, data visualisation, Data visualization, distributed-memory parallel machines, Indexing, Information retrieval, isosurface rendering, Isosurfaces, large data sets, large-scale multidimensional data, Memory management, modular services, out-of-core data sets, parallel machine, Parallel machines, Partitioning algorithms, ray-casting-based volume rendering, Rendering (computer graphics), Runtime, software libraries, storage management}, isbn = {0272-1716}, doi = {10.1109/38.933521}, author = {Kurc, T. and Catalyurek,U. and Chang,Chialin and Sussman, Alan and Saltz, J.} } @conference {17880, title = {Optimizing retrieval and processing of multi-dimensional scientific datasets}, booktitle = {Parallel and Distributed Processing Symposium, 2000. IPDPS 2000. Proceedings. 14th International}, year = {2000}, month = {2000///}, pages = {405 - 410}, publisher = {IEEE}, organization = {IEEE}, abstract = {We have developed the Active Data Repository (ADR), an infrastructure that integrates storage, retrieval, and processing of large multi-dimensional scientific datasets on distributed memory parallel machines with multiple disks attached to each node. In earlier work, we proposed three strategies for processing range queries within the ADR framework. Our experimental results show that the relative performance of the strategies changes under varying application characteristics and machine configurations. In this work we investigate approaches to guide and automate the selection of the best strategy for a given application and machine configuration. We describe analytical models to predict the relative performance of the strategies where input data elements are uniformly distributed in the attribute space of the output dataset, restricting the output dataset to be a regular d-dimensional array}, keywords = {active data repository, Area measurement, Computer science, Data analysis, distributed memory parallel machines, Educational institutions, Information retrieval, Information Storage and Retrieval, infrastructure, Microscopy, Microwave integrated circuits, multi-dimensional scientific datasets retrieval, PARALLEL PROCESSING, Pathology, range queries, regular d-dimensional array, Satellites, Tomography}, isbn = {0-7695-0574-0}, doi = {10.1109/IPDPS.2000.846013}, author = {Chang,Chialin and Kurc, T. and Sussman, Alan and Saltz, J.} }