/* ========================================================================== */ /* === klu_tsolve =========================================================== */ /* ========================================================================== */ /* Solve A'x=b using the symbolic and numeric objects from klu_analyze * (or klu_analyze_given) and klu_factor. Note that no iterative refinement is * performed. Uses Numeric->Xwork as workspace (undefined on input and output), * of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with * Numeric->Iwork). */ #include "klu_internal.h" int KLU_tsolve ( /* inputs, not modified */ klu_symbolic *Symbolic, klu_numeric *Numeric, int d, /* leading dimension of B */ int nrhs, /* number of right-hand-sides */ /* right-hand-side on input, overwritten with solution to Ax=b on output */ double B [ ], /* size n*nrhs, in column-oriented form, with * leading dimension d. */ #ifdef COMPLEX int conj_solve, /* TRUE for conjugate transpose solve, FALSE for * array transpose solve. Used for the complex * case only. */ #endif /* --------------- */ klu_common *Common ) { Entry x [4], offik, s ; double rs, *Rs ; Entry *Singleton, *Offx, *X, *Bz ; int *Q, *R, *Pnum, *Offp, *Offi ; int **Lbip, **Ubip, **Lblen, **Ublen ; Unit **LUbx, **Udiag ; int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; if (Common == NULL) { return (FALSE) ; } Common->status = KLU_OK ; if (Numeric == NULL) { /* invalid Numeric object */ Common->status = KLU_INVALID ; return (FALSE) ; } Bz = (Entry *) B ; /* ---------------------------------------------------------------------- */ /* get the contents of the Symbolic object */ /* ---------------------------------------------------------------------- */ n = Symbolic->n ; nblocks = Symbolic->nblocks ; Q = Symbolic->Q ; R = Symbolic->R ; /* ---------------------------------------------------------------------- */ /* get the contents of the Numeric object */ /* ---------------------------------------------------------------------- */ ASSERT (nblocks == Numeric->nblocks) ; Pnum = Numeric->Pnum ; Offp = Numeric->Offp ; Offi = Numeric->Offi ; Offx = (Entry *) Numeric->Offx ; Singleton = (Entry *) Numeric->Singleton ; Lbip = Numeric->Lbip ; Lblen = Numeric->Lblen ; Ubip = Numeric->Ubip ; Ublen = Numeric->Ublen ; LUbx = (Unit **) Numeric->LUbx ; Udiag = (Unit **) Numeric->Udiag ; Rs = Numeric->Rs ; X = (Entry *) Numeric->Xwork ; ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; /* ---------------------------------------------------------------------- */ /* solve in chunks of 4 columns at a time */ /* ---------------------------------------------------------------------- */ for (chunk = 0 ; chunk < nrhs ; chunk += 4) { /* ------------------------------------------------------------------ */ /* get the size of the current chunk */ /* ------------------------------------------------------------------ */ nr = MIN (nrhs - chunk, 4) ; /* ------------------------------------------------------------------ */ /* permute the right hand side, X = Q'*B */ /* ------------------------------------------------------------------ */ switch (nr) { case 1: for (k = 0 ; k < n ; k++) { X [k] = Bz [Q [k]] ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Q [k] ; X [2*k ] = Bz [i ] ; X [2*k + 1] = Bz [i + d ] ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Q [k] ; X [3*k ] = Bz [i ] ; X [3*k + 1] = Bz [i + d ] ; X [3*k + 2] = Bz [i + d*2] ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Q [k] ; X [4*k ] = Bz [i ] ; X [4*k + 1] = Bz [i + d ] ; X [4*k + 2] = Bz [i + d*2] ; X [4*k + 3] = Bz [i + d*3] ; } break ; } /* ------------------------------------------------------------------ */ /* solve X = (L*U + Off)'\X */ /* ------------------------------------------------------------------ */ for (block = 0 ; block < nblocks ; block++) { /* -------------------------------------------------------------- */ /* the block of size nk is from rows/columns k1 to k2-1 */ /* -------------------------------------------------------------- */ k1 = R [block] ; k2 = R [block+1] ; nk = k2 - k1 ; PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; /* -------------------------------------------------------------- */ /* block back-substitution for the off-diagonal-block entries */ /* -------------------------------------------------------------- */ if (block > 0) { switch (nr) { case 1: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; for (p = Offp [k] ; p < pend ; p++) { #ifdef COMPLEX if (conj_solve) { MULT_SUB_CONJ (X [k], X [Offi [p]], Offx [p]) ; } else #endif { MULT_SUB (X [k], Offx [p], X [Offi [p]]) ; } } } break ; case 2: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [2*k ] ; x [1] = X [2*k + 1] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (offik, Offx [p]) ; } else #endif { offik = Offx [p] ; } MULT_SUB (x [0], offik, X [2*i]) ; MULT_SUB (x [1], offik, X [2*i + 1]) ; } X [2*k ] = x [0] ; X [2*k + 1] = x [1] ; } break ; case 3: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [3*k ] ; x [1] = X [3*k + 1] ; x [2] = X [3*k + 2] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; #ifdef COMPLEX if (conj_solve) { CONJ (offik, Offx [p]) ; } else #endif { offik = Offx [p] ; } MULT_SUB (x [0], offik, X [3*i]) ; MULT_SUB (x [1], offik, X [3*i + 1]) ; MULT_SUB (x [2], offik, X [3*i + 2]) ; } X [3*k ] = x [0] ; X [3*k + 1] = x [1] ; X [3*k + 2] = x [2] ; } break ; case 4: for (k = k1 ; k < k2 ; k++) { pend = Offp [k+1] ; x [0] = X [4*k ] ; x [1] = X [4*k + 1] ; x [2] = X [4*k + 2] ; x [3] = X [4*k + 3] ; for (p = Offp [k] ; p < pend ; p++) { i = Offi [p] ; #ifdef COMPLEX if (conj_solve) { CONJ(offik, Offx [p]) ; } else #endif { offik = Offx [p] ; } MULT_SUB (x [0], offik, X [4*i]) ; MULT_SUB (x [1], offik, X [4*i + 1]) ; MULT_SUB (x [2], offik, X [4*i + 2]) ; MULT_SUB (x [3], offik, X [4*i + 3]) ; } X [4*k ] = x [0] ; X [4*k + 1] = x [1] ; X [4*k + 2] = x [2] ; X [4*k + 3] = x [3] ; } break ; } } /* -------------------------------------------------------------- */ /* solve the block system */ /* -------------------------------------------------------------- */ if (nk == 1) { #ifdef COMPLEX if (conj_solve) { CONJ (s, Singleton [block]) ; } else #endif { s = Singleton [block] ; } switch (nr) { case 1: DIV (X [k1], X [k1], s) ; break ; case 2: DIV (X [2*k1], X [2*k1], s) ; DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; break ; case 3: DIV (X [3*k1], X [3*k1], s) ; DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; break ; case 4: DIV (X [4*k1], X [4*k1], s) ; DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; break ; } } else { KLU_utsolve (nk, Ubip [block], Ublen [block], LUbx [block], Udiag [block], nr, #ifdef COMPLEX conj_solve, #endif X + nr*k1) ; KLU_ltsolve (nk, Lbip [block], Lblen [block], LUbx [block], nr, #ifdef COMPLEX conj_solve, #endif X + nr*k1) ; } } /* ------------------------------------------------------------------ */ /* scale and permute the result, Bz = P'(R\X) */ /* ------------------------------------------------------------------ */ if (Rs == NULL) { /* no scaling */ switch (nr) { case 1: for (k = 0 ; k < n ; k++) { Bz [Pnum [k]] = X [k] ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; Bz [i ] = X [2*k ] ; Bz [i + d ] = X [2*k + 1] ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; Bz [i ] = X [3*k ] ; Bz [i + d ] = X [3*k + 1] ; Bz [i + d*2] = X [3*k + 2] ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; Bz [i ] = X [4*k ] ; Bz [i + d ] = X [4*k + 1] ; Bz [i + d*2] = X [4*k + 2] ; Bz [i + d*3] = X [4*k + 3] ; } break ; } } else { switch (nr) { case 1: for (k = 0 ; k < n ; k++) { SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ; } break ; case 2: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ; SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ; } break ; case 3: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ; SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ; SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ; } break ; case 4: for (k = 0 ; k < n ; k++) { i = Pnum [k] ; rs = Rs [k] ; SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ; SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ; SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ; SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ; } break ; } } /* ------------------------------------------------------------------ */ /* go to the next chunk of B */ /* ------------------------------------------------------------------ */ Bz += d*4 ; } return (TRUE) ; }