@article {17693,
title = {Block Gram{\textendash}Schmidt Orthogonalization},
journal = {SIAM Journal on Scientific Computing},
volume = {31},
year = {2008},
month = {2008///},
pages = {761 - 775},
abstract = {The classical Gram{\textendash}Schmidt algorithm for computing the QR factorization of a matrix $X$ requires at least one pass over the current orthogonalized matrix $Q$ as each column of $X$ is added to the factorization. When $Q$ becomes so large that it must be maintained on a backing store, each pass involves the costly transfer of data from the backing store to main memory. However, if one orthogonalizes the columns of $X$ in blocks of $m$ columns, the number of passes is reduced by a factor of $1/m$. Moreover, matrix-vector products are converted into matrix-matrix products, allowing level-3 BLAS cache performance. In this paper we derive such a block algorithm and give some experimental results that suggest it can be quite effective for large scale problems, even when the matrix $X$ is rank degenerate.},
keywords = {blocked algorithm, GramSchmidt algorithm, orthogonalization, QR factorization},
doi = {10.1137/070682563},
url = {http://link.aip.org/link/?SCE/31/761/1},
author = {Stewart, G.W.}
}