@article {15717,
title = {Assignment and scheduling in parallel matrix factorization},
journal = {Linear Algebra and its Applications},
volume = {77},
year = {1986},
month = {1986/05//},
pages = {275 - 299},
abstract = {We consider the problem of factoring a dense n{\texttimes}n matrix on a network consisting of P MIMD processors, with no shared memory, when the network is smaller than the number of elements in the matrix (P\<n2). The specific example analyzed is a computational network that arises in computing the LU, QR, or Cholesky factorizations. We prove that if the nodes of the network are evenly distributed among processors and if computations are scheduled by a round-robin or a least-recently-executed scheduling algorithm, then optimal order of speedup is achieved. However, such speedup is not necessarily achieved for other scheduling algorithms or if the computation for the nodes is inappropriately split across processors, and we give examples of these phenomena. Lower bounds on execution time for the algorithm are established for two important node-assignment strategies.},
isbn = {0024-3795},
doi = {10.1016/0024-3795(86)90172-2},
url = {http://www.sciencedirect.com/science/article/pii/0024379586901722},
author = {O{\textquoteright}Leary, Dianne P. and Stewart, G.W.}
}