/*

    This file is part of SeDuMi 1.03BETA
    Copyright (C) 1999 Jos F. Sturm
    Dept. Quantitative Economics, Maastricht University, the Netherlands.
    Affiliations up to SeDuMi 1.02 (AUG1998):
      CRL, McMaster University, Canada.
      Supported by the Netherlands Organization for Scientific Research (NWO).
  
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
  
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
  
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/
#include <math.h>
#include <string.h>
#include "blksdp.h"
#include "blkchol.h"
#include "mex.h"

#define DO_OK

/* ************************************************************
   TIME-CRITICAL PROCEDURE -- isscalarmul(x,alpha,n)
   Computes x *= alpha using LEVEL 8 loop-unrolling.
   ************************************************************ */
void isscalarmul(double *x, const double alpha, const int n)
{
  int i;
  
  for(i=0; i< n-7; i++){          /* LEVEL 8 */
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i] *= alpha;
  }
/* ------------------------------------------------------------
   Now, i in {n-7, n-6, ..., n}. Do the last n-i elements.
   ------------------------------------------------------------ */
  if(i < n-3){                           /* LEVEL 4 */
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
    x[i++] *= alpha;
  }
  if(i < n-1){                           /* LEVEL 2 */
    x[i++] *= alpha;
    x[i++] *= alpha;
  }
  if(i < n)                              /* LEVEL 1 */
    x[i] *= alpha;
}

/* ************************************************************
   PROCEDURE cholonBlk - CHOLESKY on a dense diagonal block. The nonzeros
            in these columns that are below the diagonal block
            are also computed - except that they still should
            be divided by the scalar diagonals "lkk" afterwards.
   INPUT
     m      - number of rows (length of the first column).
     ncols  - number of columns in the supernode.(n <= m)
     tol - {ph1, maxu}: If lkk drops below tol.ph1, it is checked whether
        |l(:,k)|_{infty} < maxu. If not, then k is postponed to Phase 2.
            dep: column considered as all-0 if xkk <= tol.dep.
     ph2head, ph2link - ph2link(...(ph2link(ph2head))) is linked list,
        terminated by "-1", listing the columns in U12 that are affected
        by this supernode.
     colno - global column number of column 0. This is used only to insert
        the global column numbers into ph2nodes.
   OUTPUT
     ph2nodes - Length nph2 (<=ncols) list of subnodes (colno<= subnode <
       colno + ncols), which are postponed to Phase 2, for numerical stability.
   UPDATED
     x      - on input, contains the columns of the supernode to
              be factored. on output, contains the factored columns of
              the supernode.
     U12jc, U12pr - On input, contains the columns in U=L' for the Phase 2
         nodes. The columns linked by ph2link(ph2head) are assumed to have
         the same nonzero structure as this supernode. On output, U12jc
         points to the remainder of these columns, i.e. at the RETURNed sub-
         node. If we RETURN ncols, then just behind this 
         supernode. U12pr will be updated, i.e. forward solved.
     depnodes, pidep - dependent nodes are listed at depnodes[--*pidep],
         which is an array from right-to-left.
   WORKING ARRAY
     ph2cols - length ncols integer working array.
   RETURNS - number of postponed (rejected/ph2) pivots "nph2".
   ************************************************************ */

int cholonBlk(double *x, int m, const int ncols, const int colno,
              const tolstruct tol,
              int *ph2nodes, int *U12jc, double *U12pr,
              const int ph2head, const int *ph2link, int *ph2cols,
              int *depnodes, int *pidep)
{
  int inz,i,k,n,coltail,nph2;
  double xkk,xik, threshd;
  double *xi;
  char postpone;
/* ------------------------------------------------------------
   Initialize:
   ------------------------------------------------------------ */
  n = ncols;
  inz = 0;
  coltail = m - ncols;
  nph2 = 0;
  postpone = 0;
  for(k = 0; k < ncols; k++, --m, --n){
/* -------------------------------------------------------
   Let xkk = L(k,k).
   ------------------------------------------------------- */
    xkk = x[inz];
/* ------------------------------------------------------------
   If xkk <= tol.dep then this is a linearly dependent column:
   Tag by negative lkk, set to zero below-diagonal,
   list k in depnodes, and jump over to next column.
   ------------------------------------------------------------ */
    if(xkk <= tol.dep){
      depnodes[-- *pidep] = colno + k;
      x[inz++] = -1.0;              /* "L(k,k)=-1": means column skipped */
      for(i = 0; i < nph2; i++)
        x[ph2cols[i]++] = 0.0;      /* L(:,k) to all-0 */
      for(i = ph2head; i >= 0; i = ph2link[i])
        U12pr[U12jc[i]++] = 0.0;    /* L(:,k) to all-0 */
      for(i = 1; i < m; i++)
        x[inz++] = 0.0;             /* let L(k+1:m,k) = 0 */
    }
    else{
/* ------------------------------------------------------------
   If xkk < tol.ph1 and |x(k+1:m)|_{infty} >  tol.maxu * xkk, then
   this node is unstable here: postpone it.
   ------------------------------------------------------------ */
      if(xkk < tol.ph1){
        xi = x + inz;
        threshd = tol.maxu * xkk;
        for(i = 1; i < m; i++)
          if(threshd < fabs(xi[i])){
            postpone = 1;
            break;
          }
        if(!postpone){
          for(i = 0; i < nph2; i++)             /* search thru ph2nodes */
            if( threshd < fabs( x[ph2cols[i]] ) ){
              postpone = 1;
              break;
            }
          if(!postpone)         /* ph2link is terminated by "-1" */
            for(i = ph2head; i >= 0; i = ph2link[i])
              if( threshd < fabs( U12pr[U12jc[i]] ) ){
                postpone = 1;
                break;
              }
        }
      }
/* ------------------------------------------------------------
   If this subnode must be postponed:
   append it to ph2nodes (using global column no.), and let ph2cols[nph2]
   point to it.
   ------------------------------------------------------------ */
      if(postpone){
        postpone = 0;
        x[inz++] = -xkk;              /* Make diag elt negative (as ph2 tag) */
        ph2nodes[nph2] = colno + k;
        ph2cols[nph2] = inz;          /* point to 1st off-diagonal */
        for(i = 0; i < nph2; i++)
          ph2cols[i]++;               /* skip this node */
        for(i = ph2head; i >= 0; i = ph2link[i])
          U12jc[i]++;                 /* skip this node */
        ++nph2;
        inz += m-1;                   /* let inz point to next column */
      }
/* --------------------------------------------------------------
   Otherwise, i.e. if k is a stable pivot, then
   -------------------------------------------------------------- */
      else{
        xi = x + inz + m;                 /* point to next column */
        ++inz;
/* ------------------------------------------------------------
   For each postponed node "node":
   u(k,node) /= ukk
   u(k+1:m,node) -= u(k,node) * x(k+1:n).
   Let U12jc[node] point to k+1:m.
   ------------------------------------------------------------ */
        for(i = 0; i < nph2; i++){
          xik = (x[ph2cols[i]++] /= xkk);
          subscalarmul(x + ph2cols[i], xik, x + inz, m-1);
        }
        for(i = ph2head; i >= 0; i = ph2link[i]){
          xik = (U12pr[U12jc[i]++] /= xkk);
          subscalarmul(U12pr + U12jc[i], xik, x + inz, m-1);
        }
/* --------------------------------------------------------------
   REGULAR JOB:
   x(k+1:m,k+1:n) -= x(k+1:m,k) * x(k+1:n,k)' / xkk
   x(k+1:n,k) /= xkk,
   -------------------------------------------------------------- */
        for(i = 1; i < n; i++){
          xik = x[inz] / xkk;
          subscalarmul(xi, xik, x+inz, m-i);
          x[inz++] = xik;
          xi += m-i;
        }
        inz += coltail;                 /* Let inz point to next column */
      } /* !postpone */
    } /* xkk > toldep */
  } /* k=0:ncols-1 */
/* ------------------------------------------------------------
   Return nph2.
   ------------------------------------------------------------ */
  return nph2;
}

/* ************************************************************
   PROCEDURE cholonBlk0 - CHOLESKY on a dense diagonal block. The nonzeros
            in these columns that are below the diagonal block
            are also computed - except that they still should
            be divided by the scalar diagonals "lkk" afterwards.
            SIMPLE version (numlvl = 0), WITHOUT NUMERICAL REORDERING.
   INPUT
     m      - number of rows (length of the first column).
     ncols  - number of columns in the supernode.(n <= m)
     deptol - column considered as all-0 if xkk <= deptol.
     colno - global column number of column 0. This is used only to insert
        the global column numbers into depnodes.
   UPDATED
     x      - on input, contains the columns of the supernode to
              be factored. on output, contains the factored columns of
              the supernode.
     depnodes, pidep - dependent nodes are listed at depnodes[--*pidep],
         which is an array from right-to-left.
   ************************************************************ */

void cholonBlk0(double *x, int m, const int ncols, const int colno,
                const double deptol, int *depnodes, int *pidep)
{
  int inz,i,k,n,coltail;
  double xkk,xik;
  double *xi;
/* ------------------------------------------------------------
   Initialize:
   ------------------------------------------------------------ */
  n = ncols;
  inz = 0;
  coltail = m - ncols;
  for(k = 0; k < ncols; k++, --m, --n){
/* -------------------------------------------------------
   Let xkk = L(k,k).
   ------------------------------------------------------- */
    xkk = x[inz];
/* ------------------------------------------------------------
   If xkk <= deptol then this is a linearly dependent column:
   Tag by negative lkk, set to zero below diagonal,
   list k in depnodes, and jump over to next column.
   ------------------------------------------------------------ */
    if(xkk <= deptol){
      depnodes[-- *pidep] = colno + k;
      x[inz++] = -1.0;              /* tag "-1": means column skipped */
      for(i = 1; i < m; i++)
        x[inz++] = 0.0;             /* let L(k+1:m,k) = 0 */
    }
/* --------------------------------------------------------------
   Otherwise, i.e. if k is an acceptable pivot, then
   -------------------------------------------------------------- */
    else{
      xi = x + inz + m;                 /* point to next column */
      ++inz;
/* --------------------------------------------------------------
   REGULAR JOB:
   x(k+1:m,k+1:n) -= x(k+1:m,k) * x(k+1:n,k)' / xkk
   x(k+1:n,k) /= xkk,
   -------------------------------------------------------------- */
      for(i = 1; i < n; i++){
        xik = x[inz] / xkk;
        subscalarmul(xi, xik, x+inz, m-i);
        x[inz++] = xik;
        xi += m-i;
      }
      inz += coltail;                 /* Let inz point to next column */
    } /* xkk > toldep */
  } /* k=0:ncols-1 */
}

/* ************************************************************
  getfwIrInv  --  Inverse of the subscript function: given a subscript,
      irInv yields the position, counted FROM THE TOP of the sparse column.

  INPUT PARAMETERS -
     nnz    - LENGTH OF THE FIRST COLUMN OF THE SUPERNODE,
              INCLUDING THE DIAGONAL ENTRY.
     Lir    - Lir[0:nnz-1] ARE THE ROW INDICES OF THE NONZEROS
              OF THE FIRST COLUMN OF THE SUPERNODE.
  OUTPUT PARAMETERS - 
     irInv - On return, irInv[Lir[0:nnz-1]] = 0:nnz-1.
             The position of subscript "xij" is thus
			   xjc[j] + irInv[i].
   ************************************************************ */
void getfwIrInv(int *irInv, const int *Lir, const int nnz)
{
  int inz;

  for(inz = 0; inz < nnz; inz++)
    irInv[Lir[inz]] = inz;               /* bwinz = nnz:-1:1 */
}

/* ************************************************************
  getbwIrInv  --  Inverse of the subscript function: given a subscript,
      irInv yields the position, counted FROM THE BOTTOM of the sparse column.

  INPUT PARAMETERS -
     nnz    - LENGTH OF THE FIRST COLUMN OF THE SUPERNODE,
              INCLUDING THE DIAGONAL ENTRY.
     Lir    - Lir[0:nnz-1] ARE THE ROW INDICES OF THE NONZEROS
              OF THE FIRST COLUMN OF THE SUPERNODE.
  OUTPUT PARAMETERS - 
     irInv - On return, irInv[Lir[0:nnz-1]] = nnz:-1:1, so that
		           Lir[nnz-irInv[i]]  == i
             The position of subscript "xij" is thus
			   xjc[j+1] - irInv[i].
   ************************************************************ */
void getbwIrInv(int *irInv, const int *Lir, const int nnz)
{
  int inz,bwinz;

  bwinz = nnz;
  for(inz = 0; inz < nnz; inz++, bwinz--)
    irInv[Lir[inz]] = bwinz;               /* bwinz = nnz:-1:1 */
}

/* ************************************************************
  suboutprod  --  Computes update from a single previous column "xk" on
		a supernode "xj", using dense computations.
  INPUT
     mj, nj  -  supernode "xj" is mj x nj.  More precisely, the column
                lengths are {mj, mj-1, ..., mj-(nj-1)}.
     xkk     -  scalar, the 1st nj entries in xk are divided by this number.
     mk      -  length of xk.  WE ASSUME mk <= mj.  Only 1st mk rows in xj
                are updated.
  UPDATED
     xj  -  On return, xj -= xk*xk(0:nj-1)'/xkk
     xk  -  On return, xk(0:nj-1) /= xkk
   ************************************************************ */
void suboutprod(double *xj, int mj, const int nj, double *xk,
                const double xkk, int mk)
{
  int j;
  double xjk;

  for(j = 0; j < nj; j++){
    xjk = xk[0] / xkk;
    subscalarmul(xj, xjk, xk, mk);   /* xj -= xjk * xk */
    xk[0] = xjk;                     /* FINAL entry ljk */
    xj += mj;                    /* point to next column which is 1 shorter */
    --mj; --mk; ++xk;
  }
}

/* ************************************************************
  isminoutprod  --  Computes update from a column "xk" and stores it in "xj",
	       using dense computations. If "xkk<=0", then let xj = 0.
  INPUT
     mk, nj  -  output "xj" is mk x nj - nj*(nj-1)/2. Its column lengths are
	        {mk, mk-1, ..., mk-(nj-1)}.
     xkk     -  scalar, the 1st nj entries in xk are divided by this number.
  OUTPUT
     xj      -  On return, xj = -xk*xk(0:nj-1)'/xkk       (NOTE THE MINUS !)
                BUT: if xkk <= 0, then xj = zeros(nj*(2m-nj+1)/2,1).
  UPDATED
     xk      -  On return, xk(0:nj-1) /= xkk if xkk > 0, otherwise unchanged.
   ************************************************************ */
void isminoutprod(double *xj, const int nj, double *xk, const double xkk,
                  int mk)
{
  int j;
  double xjk;

  if(xkk > 0.0)   /* if not phase 2 node */
    for(j = 0; j < nj; j++){
      xjk = xk[0] / xkk;
      memcpy(xj,xk,mk * sizeof(double));
      isscalarmul(xj, -xjk, mk);          /* xj = -xjk * xk */
      xk[0] = xjk;                     /* FINAL entry ljk */
      xj += mk;                /* point to next column which is 1 shorter */
      --mk; ++xk;
    }
  else  /* initialize to all-0 if phase-2 node */
    fzeros(xj,(nj * (mk + mk-nj + 1))/2);
}

/* ************************************************************
   spsuboutprod  --  Computes update from a single previous column "xk" on
		     a supernode "xj", with a different sparsity structure.
                     The relevant nonzeros of xj are accessed by a single
                     indirection, via "relind[:]".
   INPUT
     mj, nj  -  supernode "xj" has mj rows in its 1st column. In total, we
	        will update nj columns, corresponding to the 1st nj nonzeros
                in xk.
     xjjc    - xjjc[0] is start of 1st column of xj (as index into xnz), etc.
     xkk     -  scalar, the 1st nj entries in xk are divided by this number.
     mk      -  length of xk.  WE ASSUME mk <= mj.
     relind  - (mj - relind[0:mk-1]) yields the locations in xj on which the
	       xk nonzeros will act.
  UPDATED
     xnz  -  On return, xj(relind,:) -= xk*xk(0:nj-1)'/xkk
     xk   -  On return, xk(0:nj-1) /= xkk
   ************************************************************ */
void spsuboutprod(const int *xjjc, double *xnz, const int mj, const int nj,
                  double *xk,const double xkk,const int mk, const int *relind)
{
  int i, j, jcol, bottomj;
  double xjk;

  ++xjjc;             /* now it points beyond bottom of columns */
  for(j = 0; j < nj; j++){
    jcol = mj - relind[j];       /* affected column */
    bottomj = xjjc[jcol];
    xjk = xk[j] / xkk;
    for(i = j; i < mk; i++)
      xnz[bottomj - relind[i]] -= xjk * xk[i];
    xk[j] = xjk;                     /* FINAL entry ljk */
  }
}

/* ************************************************************
  spadd  --  Let xj += xk, where the supernode "xj", has a sparsity
        structure. The relevant nonzeros of xj are accessed by a indirection,
	via "relind[:]".
  INPUT
     mj, nj  -  supernode "xj" has mj rows in its 1st column. In total, we
	        will update nj columns, corresponding to the 1st nj nonzero
                rows in xk.
     xjjc    - xjjc[0] is start of 1st column of xj (as index into xnz), etc.
     mk      -  length of xk.  WE ASSUME mk <= mj.
     relind  - (mj - relind[0:mk-1]) yields the locations in xj on which the
	       xk nonzeros will act.
     xk      -  mk * nk - nk*(nk-1)/2 matrix, with column lengths
	        mk, mk-1, mk-2,.. mk-(nj-1).  They'll be substracted from
                the entries in xj that are listed by relind.
  UPDATED
     xnz     -  On return, xj(relind,:) += xk
   ************************************************************ */
void spadd(const int *xjjc, double *xnz, const int mj, const int nj,
           const double *xk, const int mk, const int *relind)
{
  int i, j, jcol, bottomj,mkcol;

  ++xjjc;             /* now it points beyond bottom of columns */
  mkcol = mk;         /* mkcol = mk - j */
  for(j = 0; j < nj; j++){
    jcol = mj - relind[j];       /* affected column */
    bottomj = xjjc[jcol];
    for(i = j; i < mk; i++)
      xnz[bottomj - relind[i]] += xk[i];
    xk += (--mkcol);   /* xk(i:mk-1) is next column */
  }
}

/* ************************************************************
   PROCEDURE: reposition
   INPUT
     linkk - head of link, assumed positive. The tail of the list
       is signified by link[k] < 0.
     link - linked list of u12-column numbers, which have to
       be repositioned according to "relind".
     relind - relind[i] gives the new position of the i-the nonzero
       in those columns of u12 that are listed by "link".
     u12jc - start of columns in u12pr.
     length - number of nonzeros in u12 columns before repositioning.
     m    - activated length of the columns after repositioning (m >= length). 
   UPDATED
     u12pr - The columns of u12pr that are denoted by "link" will
       have their "length" nonzeros repositioned on a scale 0:m-1,
       viz. at positions relind[0:length-1]. The remaining m-length
       entries will be filled with zeros.
   RETURNS - last entry "k" in linked list, i.e. link[k] < 0.
   ************************************************************ */
int reposition(int linkk, const int *relind,const int *link,
               const int *u12jc, double *u12pr,
               const int m, const int length)
{
  char samestart;
  int k, inz, subnz;
  double *uj;

/* ------------------------------------------------------------
   samestart denotes whether the 1st nonzero(s) can stay at the old location.
   ------------------------------------------------------------ */
  samestart = (relind[0] == 0);
/* ------------------------------------------------------------
   Browse through all phase 2 columns uj in link[ ... link[linkk]]
   ------------------------------------------------------------ */
  while(linkk >= 0){
    k = linkk;
    linkk = link[k];
    uj = u12pr + u12jc[k];
/* ------------------------------------------------------------
   Let ujNEW = zeros(m,1), ujNEW(relind) = ujOLD.
   ------------------------------------------------------------ */
    inz = m;
    subnz = length;
    if(samestart)
      while(inz > subnz){
        for(--inz, --subnz; inz > relind[subnz]; inz--)
          uj[inz] = 0.0;
        uj[inz] = uj[subnz];
      }
    else{
      while(subnz > 0){
        for(--inz, --subnz; inz > relind[subnz]; inz--)
          uj[inz] = 0.0;
        uj[inz] = uj[subnz];
      }
      while(inz > 0)
        uj[--inz] = 0.0;
    }
  }
/* ------------------------------------------------------------
   RETURN last processed column in linked list. link[k] < 0.
   ------------------------------------------------------------ */
  return k;
}

/* ************************************************************      
   PROCEDURE precorrect  -  Apply corrections from previous columns
      on supernodal diagonal block in L-factor.
   INPUT
     ljc   - start of columns in lpr.
     irInv - For row-indices Jir of affected supernode, Jir[m-irInv[i]]  == i.
     nextj - Last subnode of affected supernode is nextj-1.
     firstk, nextk - subnodes of affecting supernode are firstk:nextk-1.
     Kir   - unfinished row indices of affecting supernode
     mk    - number of unfinished nonzeros in affecting supernode
     fwsiz - Allocated length of fwork.
   UPDATED
     lpr  - For each column k=firstk:nextk-1, and the affected columns j
       in node, DO  L(:,j) -= (ljk / lkk) * L(:,k),
       and store the definitive j-th row of L, viz. ljk /= lkk.
   WORKING ARRAYS
     relind - length mk integer array
     fwork  - length fwsiz vector, for storing -Xk * inv(LABK) * Xk'.
   RETURNS  ncolup, number of columns updated by snode k.
   ************************************************************ */
int precorrect(const int *ljc,double *lpr, const int *irInv,
               const int nextj, const int *Kir, const int mk,
               const int firstk, const int nextk,
               int *relind, const int fwsiz,double *fwork)
{
  int i,j,k,ncolup,mj,inz;
  double xkk;
  double *xj;
/* ------------------------------------------------------------
   j = first subscript in k (== 1st affected column)
   i = last subscript in k
   ncolup = number of nz-rows in k corresponding to columns in node.
   mj = number of nonzeros in l(:,j), the 1st affected column
   ------------------------------------------------------------ */
  j = Kir[0];
  i = Kir[mk-1];
  if(i < nextj)
    ncolup = mk;
  else
    for(ncolup = 1; Kir[ncolup] < nextj; ncolup++);
  mj = ljc[j+1] - ljc[j];
/* ------------------------------------------------------------
   If nz-structure of k is a single block in structure of node,
   (i.e. irInv[Kir[0]] - irInv[Kir[mk-1]] == mk-1). The subnodes
   of "node" must then be consecutive and at the start.
   Thus, we use dense computations :
   ------------------------------------------------------------ */
  if(irInv[j] - irInv[i] < mk){
    xj = lpr + ljc[j];
    inz = ljc[firstk];
    for(k = firstk; k < nextk;){
      xkk = lpr[inz];
      inz = ljc[++k];
      if(xkk > 0)    /* Skip Phase-2 nodes */
        suboutprod(xj, mj, ncolup, lpr + inz-mk, xkk, mk);
    }
  }
  else{
/* ------------------------------------------------------------
   Otherwise, the nz-indices of k are scattered within the structure of node.
   Let relind be the position of these nz's in node, COUNTED FROM THE BOTTOM.
   ------------------------------------------------------------*/
    for(i = 0; i < mk; i++)
      relind[i] = irInv[Kir[i]];
/* ------------------------------------------------------------
   If k is a single column, then do update directly in lpr:
   ------------------------------------------------------------ */
    if(nextk - firstk == 1){
      xkk = lpr[ljc[firstk]];
      if(xkk > 0)            /* Skip phase 2 nodes */
        spsuboutprod(ljc+j,lpr,mj, ncolup, lpr + ljc[nextk]-mk,
                     xkk,mk, relind);
    }
    else{
/* ------------------------------------------------------------
   Multiple columns in affecting snode:
   1. compute the complete modification, and store it in fwork:
   fwork = -Xk * inv(LABK) * Xk'
   ------------------------------------------------------------ */
      if(fwsiz < mk * ncolup - ncolup*(ncolup-1)/2)
        mexErrMsgTxt("Not enough allocated space to fwork.");
      xkk = lpr[ljc[firstk]];           /* 1st column */
      k = firstk + 1;
      inz = ljc[k];
      isminoutprod(fwork, ncolup, lpr + inz-mk, xkk, mk);
      while(k < nextk){      /* remaining cols */
        xkk = lpr[inz];
        inz = ljc[++k];
        if(xkk > 0)   /* skip phase 2 node */
          suboutprod(fwork, mk, ncolup, lpr + inz-mk, xkk, mk);
      }
/* ------------------------------------------------------------
   2. subtract fwork from the sparse columns of node, using relind.
   ------------------------------------------------------------ */
      spadd(ljc+j,lpr,mj, ncolup, fwork,mk, relind);
    }
  } /* end of scattered case */
/* ------------------------------------------------------------
   RETURN number of columns updated, i.e. #subnodes in k that we finished.
   ------------------------------------------------------------ */
  return ncolup;
}

/* ************************************************************
   PROCEDURE searchsnodes: find all supernodes from the linked list
     lhead->link(lhead)-> .. -> nsuper (==eol tag) in which row L(j,:)
     has nonzeros.
   INPUT
     link, lhead, nsuper- linked list, starts at lhead, terminates where
         link[node]==nsuper. These are the super-nodes to search in
     j   - row (sub-node number) to search for
     n   - xsuper[node+1]-xsuper[node] with node=snode(j).
     length - Gives remaining length for each supernode. The search-size
         can thus be shrinked to MIN(length[k],n) for each supernode k.
     xlindx, lindx - the row-indices for each supernode. We start searching
        at xlindx[k+1]-length[k].
   OUTPUT
     rowljir  - Length nsuper array. Lists supernodes from link that
          have nonzero at row j.
     rowljloc - Length nsuper array. Gives position FROM BOTTOM of nz-row
          j for each supernode.
   RETURNS length of rowljir, rowljloc: #supernodes.
   ************************************************************ */
int searchsnodes(int *rowljir,int *rowljloc, const int *link,const int lhead,
                 const int *xlindx, const int *lindx, const int *length,
                 const int nsuper,const int n,const int j)
{
  int supnz, k;
  const int *Kir;
  int *iptr;

/* ------------------------------------------------------------
   Let rowljir(:) list preceding supernodes in which L(j,:) has nonzeros,
   and rowljloc(:) the nz-location of L(j,i) from the bottom of L(:,i).
   ------------------------------------------------------------ */
  supnz = 0;
  for(k = lhead; k < nsuper; k = link[k]){
    Kir = lindx + xlindx[k+1];
    if( (iptr = ibsearch(&j,Kir-length[k],MIN(length[k],n))) != (int *) NULL){
      rowljir[supnz] = k;
      rowljloc[supnz++] = Kir - iptr;         /* location from bottom */
    }
  }
  return supnz;
}

/* ************************************************************
   PROCEDURE ph2cols2u12 - For subnodes "colk" that are postponed
     to phase 2, let u12(:,k) = L(colk,:)' + L(:,colk), skipping
     the diagonal entry. Afterwards, let L(colk,:)=0, L(:,colk)=0.
   INPUT
     ljc       - m+1-array. column pointers into lpr
     addnph2 - pph2->nodes(ph2.n:ph2.n+addnph2-1) lists the subnodes
               of supernode "node" that are postponed.
     endnzold  - j + m - 1, where j=xsuper[node], m = ljc[j+1]-ljc[j].
     nextj     - subnote where we'll want to proceed Cholesky. Thus, if
                supernode "node" is done, then nextj = xsuper[node+1].
     length    - nsuper-array. For previous supernodes, gives remaining
               length, starting with rows of current supernode (node).
     nsuper    - number of supernodes
     link      - node->link(node)->..->nsuper is linked list of affecting
               supernodes.
     xlindx,lindx - row indices for each supernode.
     iwsiz - length(iwork), should be >= 2*nsuper.
   UPDATED
     pph2->n   - addnph2 is added.
     u12jc     - nph2new+1-array. On input, has u12(0:nph2old) as column
               pointers. On output,
               has also u12(nph2old:nph2new). The pointers point to the still
               unfinished parts, i.e. starting at subnode "nextj".
     pph2->u12, pph2->maxnnz - The new u12-values.
     lpr       - On input, lpr(:,ph2nodes[i]) contains the columns
               having received all preceding updates. Since we don't pivot
               on them now, they become rows in L, i.e. column in u12.
               On output, these columns are zero below the diagonal.
     ph2link   - linked list of previous ph2nodes, affecting currently
           processed supernode. Starts at *pph2head, ends at -1. On output,
           it starts at nph2new-1, and browses first through the new ph2nodes,
           then links to the previous list.
     pph2head  - head of ph2link for currently processed supernode.
   REALLOCATED
     pph2->u12.pr, pph2->u12.ir will have pph2->maxnnz nonzeros.
   WORKING ARRAYS
     work =[rowljir, rowljloc] - two length nsuper integer working arrays.
   ************************************************************ */
void ph2cols2u12(int *u12jc,ph2struct *pph2,const int *ljc, double *lpr,
                 const int addnph2,
                 const int endnzold, const int nextj,
                 int *ph2link, int *pph2head,
                 const int *snode, const int *xsuper, const int nsuper,
                 const int *xlindx, const int *lindx,
                 const int *link, const int *length,
                 int *iwork, const int iwsiz)
{
  int i,j,k, inz, colk, iloc, nk,mk, isup, node, n, ph2len, supnnz, nextinz;
  int nph2old, nph2new;
  double *lk;
  jcir u12;
  int *rowljir, *rowljloc;

/* ------------------------------------------------------------
   Partition iwork = [rowljir(nsuper), rowljloc(nsuper)]
   ------------------------------------------------------------ */
  if(iwsiz < 2 * nsuper)
    mexErrMsgTxt("Insufficient integer workspace");
  rowljir = iwork;
  rowljloc = iwork + nsuper;
/* ------------------------------------------------------------
   Currently, pph2->n is before adding "addnph2". Update pph2->n now.
   ------------------------------------------------------------ */
  nph2old = pph2->n;
  nph2new = nph2old + addnph2;
  pph2->n = nph2new;
/* ------------------------------------------------------------
   Initialize: node is current supernode, with length n, let u12 be a local
   copy of pph2->u12, ph2len = pph2->len[node], inz = u12jc[nph2old].
   ------------------------------------------------------------ */
  node = snode[pph2->nodes[nph2old]];
  n = xsuper[node+1]-xsuper[node];
  ph2len = pph2->len[node];
  u12 = pph2->u12;
  inz = u12jc[nph2old];               /* pointer into u12.ir */
/* ------------------------------------------------------------
   For each nph2old <= k < nph2new, colk = ph2nodes(k), find
   supernodes in which row L(colk,:) has nonzeros, and let
   nk = nnz(L(colk,0:xsuper[node]-1))
   ------------------------------------------------------------ */
  for(k = nph2old; k < nph2new; k++){
    colk = pph2->nodes[k];
    supnnz = searchsnodes(rowljir,rowljloc, link,link[node], xlindx,lindx,
                          length, nsuper,n,colk);
    for(i = 0, nk = 0; i < supnnz; i++){
      isup = rowljir[i];
      nk += xsuper[isup+1] - xsuper[isup];
    }
/* ------------------------------------------------------------
   Column u2k has nk nonzeros from previous supernodes, and ph2len
   from current + later ones. Realloc u12.pr, u12.ir if necessary.
   ------------------------------------------------------------ */
    nextinz = inz + nk + ph2len;
    if(nextinz > pph2->maxnnz){
      pph2->maxnnz += nextinz;        /* required + old amount */
      if((u12.pr = (double *) mxRealloc(u12.pr, pph2->maxnnz*sizeof(double)))
         == NULL)
        mexErrMsgTxt("Out of memory");
      if((u12.ir = (int *) mxRealloc(u12.ir, pph2->maxnnz*sizeof(int)))
         == NULL)
        mexErrMsgTxt("Out of memory");
    }
/* ------------------------------------------------------------
   Move nonzeros (with subscripts) from previous supernodes:
   reset originals to 0 after using.
   ------------------------------------------------------------ */
    for(i = 0; i < supnnz; i++){
      isup = rowljir[i];
      iloc = rowljloc[i];
      for(j = xsuper[isup]; j < xsuper[isup+1]; j++){
        u12.ir[inz] = j;
        u12.pr[inz++] = lpr[ljc[j+1]-iloc];
        lpr[ljc[j+1]-iloc] = 0.0;
      }
    }
/* ------------------------------------------------------------
   Idem with current supernode: move L(colk, xsuper[node]:colk-1).
   ------------------------------------------------------------ */
    iloc = ljc[colk+1] - ljc[colk];
    for(i = xsuper[node]; i < colk; i++){
      u12.ir[inz] = i;
      u12.pr[inz++] = lpr[ljc[i+1]-iloc];
      lpr[ljc[i+1]-iloc] = 0.0;
    }
/* ------------------------------------------------------------
   Move below-diag-part L(:,colk) to uk. Skip diagonal entry.
   ------------------------------------------------------------ */
    mk = endnzold - colk;
    lk = lpr + ljc[colk] + 1;
    memcpy(u12.pr + inz, lk,mk * sizeof(double));
    fzeros(lk, mk);
/* ------------------------------------------------------------
   Write below-diag row-indices for u12 within current supernode:
   ------------------------------------------------------------ */
    i = colk;
    for(i = colk + 1; i < xsuper[node + 1]; i++)
      u12.ir[inz++] = i;
/* ------------------------------------------------------------
   For each affected supernode isup, insert ALL MEMBERS of isup.
   ------------------------------------------------------------ */
    isup = node;
    iloc = i - xsuper[isup];        /* iloc = xsuper[node+1]-xsuper[node] */
    while(xlindx[isup] + iloc < xlindx[isup + 1]){
      isup = snode[lindx[xlindx[isup] + iloc]];
      mxAssert(inz < pph2->maxnnz,"");
      for(j = xsuper[isup]; j < xsuper[isup+1]; j++)
        u12.ir[inz++] = j;              /* insert complete supernode */
      iloc = j - xsuper[isup];         /* iloc = xsuper[jsup+1]-xsuper[jsup] */
    }
    mxAssert(inz == nextinz,"");
    u12jc[k+1] = inz;
  }
/* ------------------------------------------------------------
   Let u12.jc(nph2old+1:nph2new) = u12jc(nph2old+1:nph2new).
   Then set u12jc to unfinished part.
   ------------------------------------------------------------ */
  i = nph2old + 1;
  memcpy(u12.jc + i, u12jc + i, addnph2 * sizeof(int));
  iloc = ph2len - (nextj - xsuper[node] - 1);  /* subtract finished part */
  for(; i <= nph2new; i++)
    u12jc[i-1] = u12jc[i] - iloc;
/* ------------------------------------------------------------
   Insert new ph2nodes in ph2link[...ph2link[ph2head[node]]].
   ------------------------------------------------------------ */
  i = nph2old;                   /* first available position */
  ph2link[i] = *pph2head;        /* link to existing list, or -1. */
  for(++i; i < nph2new; i++)
    ph2link[i] = i - 1;
  *pph2head = nph2new - 1;       /* start list in last entry */
/* ------------------------------------------------------------
   Return possibly reallocated pointers in u12:
   ------------------------------------------------------------ */
  pph2->u12 = u12;
}

/* ============================================================
   MAIN ROUTINE: blkLDL
   ============================================================ */
/* ************************************************************
   BLKLDL  --  Block-sparse L*D*L' Cholesky factorization.

   INPUT:
      neqns   - Order "m": L is neqns * neqns
      nsuper  - Number of supernodes (blocks).
      xsuper  - Length nsuper+1: first simple-node of each supernode
      snode   - Length neqns: snode(node) is the supernode containing "node".
      xlindx  - Length nsuper+1: Start of sparsity structure in lindx,
              for each supernode (all simple nodes in a supernode have the
              same nonzero-structure).
      lindx   - row indices, for each supernode.
      ljc     - Length neqns+1: start of the columns of L.
      perm    - Length neqns: reordering of pne->At columns in Cholesky.
      tol     - Tolerance parameters for numerical control.
      iwsiz, fwsiz - size of integer and floating-point working storage.
               See "WORKING ARRAYS" for required amount.
   UPDATED:
      Lpr     - On input, contains tril(X), on output,
              diag(Lpr) == "D" and tril(Lpr,-1) = "L" - eye(neqns)
	      such that   X = L*D*L'.
      pph2    - *pph2 is a structure on which nodes are postponed to Phase-2;
         such nodes would be numerically unstable here (see tol-structure).
         On INPUT: pph2->len(nsuper) gives remaining nnz of L(xsuper(node),:)
         if the pivot xsuper(node) is postponed to phase 2.
         pph2->nodes(neqns), pph2->u12jc(neqns+1), pph2->u12ir(pph2->maxnnz),
         and pph2->u12.pr(pph2->maxnnz) are allocated.
         On OUTPUT: pph2->n is number of postponed nodes,
         pph2->maxnnz is nnz in pph2->u12.{ir,pr}, which is re-allocated
         accordingly; pph2->u12jc is re-allocated to pph2->n+1.
   WORKING ARRAYS:
      iwork  - Length iwsiz working array, used for
           link(nsuper), length(nsuper), ph2pattern(nsuper), ph2head(nsuper),
           ph2link(neqns), irInv(neqns), relind(neqns), u12jc(neqns+1),
           iwork(2 * nsuper).
           iwsiz = 4*m + 6 * nsuper + 1
      fwork  - Length fwsiz working vector, used for
             fwork(L.tmpsiz) in precorrect. fwsiz = L.tmpsiz.
   ACKNOWLEDGMENT:
       Parts are inspired by F77-block Cholesky of Ng and Peyton (ORNL).
   RETURNS - number of dependent columns. The col.numbers are listed from the
       bottom of pph2->nodes.
   ************************************************************ */

int blkLDL(const int neqns, const int nsuper, const int *xsuper,
           const int *snode,  const int *xlindx, const int *lindx,
           const int *ljc, double *lpr, const int *perm,
           const tolstruct tol, ph2struct *pph2,
           int iwsiz, int *iwork, int fwsiz, double *fwork)
{
  const int *Jir, *Kir;
  int *link, *length, *irInv, *relind, *ph2pattern, *ph2head, *ph2link,
    *u12jc, *ncolupLst;
  int *rowljir, *rowljloc;
  int node,nextj,i,j,nnzj,n,inz,  k,colk,mk,linkk, ncolup,snodei,
    addnph2, endnzold, idep;
  ph2struct ph2;
/* ------------------------------------------------------------
   Partition the working arrays:
   iwork: link(nsuper), length(nsuper), ph2pattern(nsuper), ph2head(nsuper),
     ph2link(neqns), irInv(neqns), relind(neqns), u12jc(neqns+1), iworkNEW.
   ------------------------------------------------------------ */
  iwsiz -= 4 * neqns + 4 * nsuper + 1;          /* IWORK: */
  if(iwsiz < 0)
    mexErrMsgTxt("iwsiz too small in blkLDL.");
  link       = iwork;                    /* 4 times length nsuper: */
  length     = (iwork += nsuper);
  ph2pattern = (iwork += nsuper);
  ph2head    = (iwork += nsuper);
  ph2link   = (iwork += nsuper);         /* 3 * length neqns: */
  irInv     = (iwork += neqns);
  relind    = (iwork += neqns);     
  u12jc      = (iwork += neqns);         /* length neqns+1 */
  iwork += neqns + 1;
/* ------------------------------------------------------------
   ncolupLst(neqns) shares the same working array as irInv(neqns).
   Namely, at stage j=xsuper[node], irInv uses only entries >= j,
   whereas ncolupLst only applies to the "old" columns < j.
   ------------------------------------------------------------ */
  ncolupLst = irInv;
/* ------------------------------------------------------------
   Initialize link and ph2pattern to END-OF-LIST  (== nsuper)
   ------------------------------------------------------------ */
  for(node = 0; node < nsuper; node++)
    link[node] = nsuper;
  for(node = 0; node < nsuper; node++)
    ph2pattern[node] = nsuper;
/* ------------------------------------------------------------
   Initialize PHASE2-structure: ph2 = *pph2, ph2.n = 0, u12jc[0] = 0,
   ph2.u12.jc[0]=0.
   ------------------------------------------------------------ */
  ph2   = *pph2;
  ph2.n = 0;
  u12jc[0] = 0;
  ph2.u12.jc[0] = 0;
/* ------------------------------------------------------------
   Initialize idep = neqns. We'll list dependent nodes at ph2.nodes[--idep],
   i.e. from the bottom of ph2.nodes.
   ------------------------------------------------------------ */
  idep = neqns;
/* ------------------------------------------------------------
   For each supernode "node", start at subnode j = xsuper[node],
   having sparsity pattern Jir.
   ------------------------------------------------------------ */
  nextj = xsuper[0];
  for(node = 0; node < nsuper; node++){
    j = nextj;		                /* 1st col in node */
    nextj = xsuper[node+1];
    n = nextj - j;			/* length of node */
    Jir = lindx + xlindx[node];         /* row-indices for column j */
    nnzj = ljc[j+1] - ljc[j];           /* nnz( column j ) */
    endnzold = j + nnzj - 1;
/* ------------------------------------------------------------
   Position the nonzeros of the relevant ph2nodes in the nonzero-
   pattern of the current supernode "node".  The current patterns
   coincide with those of the supernodes k=ph2pattern[...ph2pattern[node]].
   The corresponding ph2-columns are ph2link[...ph2link[ph2head[k]]], for
   each pattern k. We glue them here into a single linked list, because
   they all get the "node"-pattern.
   ------------------------------------------------------------ */
    k = ph2pattern[node];
    if(k < nsuper){
      getfwIrInv(irInv,Jir,nnzj);
      colk = ph2head[k];
      ph2head[node] = colk;              /* start of pattern "node" */
      while(1){
        mk = length[k];                  /* unfinished nonzeros in k */
        Kir = lindx + xlindx[k+1]-mk;    /* row-indices for those nonzeros */
        for(i = 0; i < mk; i++)
          relind[i] = irInv[Kir[i]];
        i = reposition(colk, relind,ph2link,u12jc,ph2.u12.pr, nnzj,mk);
        k = ph2pattern[k];                /* Link to next pattern */
        if(k < nsuper){
          colk = ph2head[k];
          ph2link[i] = colk;              /* glue to pattern "node" */
        }
        else break;
      }
    }
    else
      ph2head[node] = -1;             /* no ph2nodes affected, -1 = tail */
/* ------------------------------------------------------------
   Compute inverse of Jir, yielding position from the bottom:
   Jir[nnzj-irInv[i]]  == i
   This will be handy when adding a column with a sub-sparsity structure
   to column j.
   ------------------------------------------------------------ */
    getbwIrInv(irInv, Jir, nnzj);
/* ------------------------------------------------------------
   Apply corrections from relevant previous super-nodes;
   these snodes are
   node -> link[node] -> link[link[node]] -> ...
   ------------------------------------------------------------ */
    for(k = link[node]; k < nsuper; k = link[k]){
      ncolupLst[k] = precorrect(ljc,lpr,irInv, nextj,
                                lindx + xlindx[k+1]-length[k],
                                length[k],xsuper[k],xsuper[k+1],
                                relind,fwsiz,fwork);
    }	
/* ------------------------------------------------------------
   DO DENSE CHOLESKY on the current supernode
   u12jc[ph2.n+1:ph2.n+1+n] is used as integer work array.
   ------------------------------------------------------------ */
    addnph2 = cholonBlk(lpr + ljc[j],nnzj, n, j, tol,
                        ph2.nodes + ph2.n, u12jc, ph2.u12.pr,
                        ph2head[node], ph2link, u12jc + ph2.n + 1,
                        ph2.nodes, &idep);
    if(addnph2){
      ph2cols2u12(u12jc, &ph2, ljc, lpr, addnph2, endnzold, nextj,
                  ph2link, ph2head + node, snode,xsuper, nsuper,
                  xlindx,lindx, link, length, iwork, iwsiz);
      mxAssert(ph2.n < neqns,"");
    }
/* ------------------------------------------------------------
   insert each current affecting snode k into linked list of
   next supernode it will affect.
   ------------------------------------------------------------ */
    for(k = link[node]; k < nsuper; k = linkk){
      linkk = link[k];
      mk = (length[k] -= ncolupLst[k]);    /* unfinished nonzeros in k */
      if(mk){                              /* if not yet terminated: */
        i = lindx[xlindx[k+1]-mk];
        snodei = snode[i];
        link[k] = link[snodei];            /* prev. also affecting i */
        link[snodei] = k;                  /* next snode it'll affect */
      }
    }
/* ------------------------------------------------------------
   The same for current snode "node" itself:
   ------------------------------------------------------------ */
    if((length[node] = nnzj - n) > 0){
      i = Jir[n];                    /* 1st row outside snode */
      snodei = snode[i];
      link[node] = link[snodei];     /* prev. also affecting i */
      link[snodei] = node;
/* ------------------------------------------------------------
   Link this set of ph2nodes to first to-be-affected supernode "snodei".
   ------------------------------------------------------------ */
      if(ph2head[node] >= 0){
        ph2pattern[node] = ph2pattern[snodei];   /* link thru, or nsuper */
        ph2pattern[snodei] = node;     /* put us on top of link */
      }
    }
    else
      length[node] = 0;              /* Supernode terminated */
  } /* node = 0:nsuper-1 */
/* ------------------------------------------------------------
   FINISHING: REALLOC ph2.u12.pr to the actual maxnnz:= u12jc[ph2.n],
   return updated phase2-structure in *pph2, and return the number
   of dependent columns, ndep = neqns-idep.
   ------------------------------------------------------------ */
  if(u12jc[ph2.n] < ph2.maxnnz){
    ph2.maxnnz = MAX(u12jc[ph2.n],1);           /* avoid realloc to NULL */
    if((ph2.u12.pr = (double *) mxRealloc(ph2.u12.pr, ph2.maxnnz *
                                         sizeof(double))) == NULL)
      mexErrMsgTxt("Memory allocation error");
    if((ph2.u12.ir = (int *) mxRealloc(ph2.u12.ir, ph2.maxnnz * sizeof(int)))
       == NULL)
      mexErrMsgTxt("Memory allocation error");
  }
  if((ph2.u12.jc = (int *) mxRealloc(ph2.u12.jc, (ph2.n + 1) * sizeof(int)))
     == NULL)
    mexErrMsgTxt("Memory allocation error");
  *pph2 = ph2;
  return neqns - idep;
}

/* ************************************************************
   BLKLDL0  --  Block-sparse L*D*L' Cholesky factorization.
         SIMPLE VERSION (numlvl=0), i.e. no numerical reordering.

   INPUT:
      neqns   - Order "m": L is neqns * neqns
      nsuper  - Number of supernodes (blocks).
      xsuper  - Length nsuper+1: first simple-node of each supernode
      snode   - Length neqns: snode(node) is the supernode containing "node".
      xlindx  - Length nsuper+1: Start of sparsity structure in lindx,
              for each supernode (all simple nodes in a supernode have the
              same nonzero-structure).
      lindx   - row indices, for each supernode.
      ljc     - Length neqns+1: start of the columns of L.
      perm    - Length neqns: reordering of pne->At columns in Cholesky.
      deptol  - node j is discarded if d(j) drops below deptol (see depnodes).
      iwsiz, fwsiz - size of integer and floating-point working storage.
               See "WORKING ARRAYS" for required amount.
   UPDATED:
      Lpr     - On input, contains tril(X), on output,
              diag(Lpr) == "D" and tril(Lpr,-1) = "L" - eye(neqns)
	      such that   X = L*D*L'.
   OUTPUT
      depnodes - length neqns array. See "RETURNS".
   WORKING ARRAYS:
      iwork  - Length iwsiz working array, used for
           link(nsuper), length(nsuper),
           irInv(neqns), relind(neqns),
           iwsiz = 2*m + 2 * nsuper
      fwork  - Length fwsiz working vector, used for
             fwork(L.tmpsiz) in precorrect. fwsiz = L.tmpsiz.
   ACKNOWLEDGMENT:
       Parts are inspired by F77-block Cholesky of Ng and Peyton (ORNL).
   RETURNS - number of dependent columns. The col.numbers are listed from the
       bottom of depnodes.
   ************************************************************ */

int blkLDL0(const int neqns, const int nsuper, const int *xsuper,
            const int *snode,  const int *xlindx, const int *lindx,
            const int *ljc, double *lpr, const int *perm,
            const double deptol, int *depnodes,
            int iwsiz, int *iwork, int fwsiz, double *fwork)
{
  const int *Jir;
  int *link, *length, *irInv, *relind, *ncolupLst;
  int *rowljir, *rowljloc;
  int node,nextj,i,j,nnzj,n,inz,  k,colk,mk,linkk, ncolup,snodei,
    endnzold, idep;
/* ------------------------------------------------------------
   Partition the working arrays:
   iwork: link(nsuper), length(nsuper),
     irInv(neqns), relind(neqns), iworkNEW.
   ------------------------------------------------------------ */
  iwsiz -= 2 * neqns + 2 * nsuper;          /* IWORK: */
  if(iwsiz < 0)
    mexErrMsgTxt("iwsiz too small in blkLDL.");
  link       = iwork;                    /* 2 times length nsuper: */
  length     = (iwork += nsuper);
  irInv     = (iwork += nsuper);          /* 2 * length neqns: */
  relind    = (iwork += neqns);
#ifndef NDEBUG
  iwork += neqns;                          /* will not be used */
#endif
/* ------------------------------------------------------------
   ncolupLst(neqns) shares the same working array as irInv(neqns).
   Namely, at stage j=xsuper[node], irInv uses only entries >= j,
   whereas ncolupLst only applies to the "old" columns < j.
   ------------------------------------------------------------ */
  ncolupLst = irInv;
/* ------------------------------------------------------------
   Initialize link to END-OF-LIST  (== nsuper)
   ------------------------------------------------------------ */
  for(node = 0; node < nsuper; node++)
    link[node] = nsuper;
/* ------------------------------------------------------------
   Initialize idep = neqns. We'll list dependent nodes at depnodes[--idep],
   i.e. from the bottom of depnodes.
   ------------------------------------------------------------ */
  idep = neqns;
/* ------------------------------------------------------------
   For each supernode "node", start at subnode j = xsuper[node],
   having sparsity pattern Jir.
   ------------------------------------------------------------ */
  nextj = xsuper[0];
  for(node = 0; node < nsuper; node++){
    j = nextj;		                /* 1st col in node */
    nextj = xsuper[node+1];
    n = nextj - j;			/* length of node */
    Jir = lindx + xlindx[node];         /* row-indices for column j */
    nnzj = ljc[j+1] - ljc[j];           /* nnz( column j ) */
    endnzold = j + nnzj - 1;
/* ------------------------------------------------------------
   Compute inverse of Jir, yielding position from the bottom:
   Jir[nnzj-irInv[i]]  == i
   This will be handy when adding a column with a sub-sparsity structure
   to column j.
   ------------------------------------------------------------ */
    getbwIrInv(irInv, Jir, nnzj);
/* ------------------------------------------------------------
   Apply corrections from relevant previous super-nodes;
   these snodes are
   node -> link[node] -> link[link[node]] -> ...
   ------------------------------------------------------------ */
    for(k = link[node]; k < nsuper; k = link[k]){
      ncolupLst[k] = precorrect(ljc,lpr,irInv, nextj,
                                lindx + xlindx[k+1]-length[k],
                                length[k],xsuper[k],xsuper[k+1],
                                relind,fwsiz,fwork);
    }	
/* ------------------------------------------------------------
   DO DENSE CHOLESKY on the current supernode
   ------------------------------------------------------------ */
    cholonBlk0(lpr + ljc[j],nnzj, n, j, deptol, depnodes, &idep);
/* ------------------------------------------------------------
   insert each current affecting snode k into linked list of
   next supernode it will affect.
   ------------------------------------------------------------ */
    for(k = link[node]; k < nsuper; k = linkk){
      linkk = link[k];
      mk = (length[k] -= ncolupLst[k]);    /* unfinished nonzeros in k */
      if(mk){                              /* if not yet terminated: */
        i = lindx[xlindx[k+1]-mk];
        snodei = snode[i];
        link[k] = link[snodei];            /* prev. also affecting i */
        link[snodei] = k;                  /* next snode it'll affect */
      }
    }
/* ------------------------------------------------------------
   The same for current snode "node" itself:
   ------------------------------------------------------------ */
    if((length[node] = nnzj - n) > 0){
      i = Jir[n];                    /* 1st row outside snode */
      snodei = snode[i];
      link[node] = link[snodei];     /* prev. also affecting i */
      link[snodei] = node;
    }
    else
      length[node] = 0;              /* Supernode terminated */
  } /* node = 0:nsuper-1 */
/* ------------------------------------------------------------
   FINISHING: return the number of dependent columns, ndep = neqns-idep.
   ------------------------------------------------------------ */
  return neqns - idep;
}
