/*
============================================================
 [LL,d,U12,L22,ph2s,dep] = blkchol(L,ADA,pars)

mex blkchol.c blkchol2.c blkaux.c

============================================================
% [L,d,U12,L22,ph2nodes,dep] = blkchol(L,P,pars)
%   Computes sparse lower-triangular Cholesky factor L,
%            L*L' = P(perm,perm)
%   Parameters LSYMB, PERM, XSUPER and TMPSIZ
%   are typically generated by ORDMMDMEX.
%
%   There are important differences with standard CHOL(P(perm,perm))':
%
%   -  BLKCHOL uses the supernodal partition XSUPER, possibly splitted
%    by SPLIT, to use dense linear algebra on dense subblocks.
%    Much faster than CHOL.
%
%   -  BLKCHOL may adjust the pivot order of PERM, if numerical stability
%    is in danger. Namely, if a pivot drops below PH1TOL, then that node
%    is postponed until the end of the pivot-ordering.
%
%   -  BLKCHOL never fails. It only sees the lower triangular part of P;
%    if during the elimination, a diagonal entry drops below RELTOL*P(i,i),
%    that node i is flagged with the value -1, and discarded in
%    further computations.  This allows factorization of psd symmetric
%    systems with dependent rows.
   
   
    This file is part of SeDuMi 1.03BETA
    Copyright (C) 1999 Jos F. Sturm
    Dept. Quantitative Economics, Maastricht University, the Netherlands.
    Affiliations up to SeDuMi 1.02 (AUG1998):
      CRL, McMaster University, Canada.
      Supported by the Netherlands Organization for Scientific Research (NWO).
  
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
  
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
  
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/
#include <string.h>
#include "mex.h"
#include "blksdp.h"
#include "blkchol.h"

#define L_OUT    myplhs[0]
#define D_OUT    myplhs[1]
#define U12_OUT  myplhs[2]
#define L22_OUT  myplhs[3]
#define PH2NODES_OUT myplhs[4]
#define DEP_OUT  myplhs[5]
#define NPAROUT 6

#define L_IN      prhs[0]
#define P_IN      prhs[1]
#define PARS_IN   prhs[2]
#define NPARIN 3

/* ============================================================
   SUBROUTINES:
   ============================================================*/
/* ************************************************************
   PROCEDURE makeu12ir
   INPUT
     ph2nodes - length nph2 list of postponed node numbers.
     xsuper   - supernode partition.
     snode    - maps each column to the supernode containing it.
     (xlindx,lindx) - the compact subscript array,
                which has the row indices for each supernode
		(including the diagonal elements).
     nph2     - length of ph2nodes
     u12nnz   - allocated length of u12ir.
   OUTPUT
     u12ir    - Length u12nnz subscript array, corresponding to postponing
       the nodes "ph2nodes" in the cholesky decomposition.
     u12jc    - length nph2+1 list of column-offsets of u12.
   ************************************************************ */
void makeu12ir(int *u12ir, int *u12jc, const ph2struct ph2,
               const int *snode, const int *xsuper,
               const int *xlindx, const int *lindx)
{
  int inz, k, j, jsup;

  inz = 0;
/* ------------------------------------------------------------
   Write nonzero structure of each column ph2.nodes[k], k = 0:ph2.n-1.
   ------------------------------------------------------------ */
  for(k = 0; k < ph2.n; k++){
    u12jc[k] = inz;
    j = ph2.nodes[k];
    jsup = snode[j];
/* ------------------------------------------------------------
   Node j=ph2.nodes[k] is part of supernode jsup. Insert all nodes
   within jsup that ARE AFTER (>) j.
   ------------------------------------------------------------ */
    for(++j; j < xsuper[jsup+1]; j++)
      u12ir[inz++] = j;              /* 1st supernode only below diag of j */
    j -= xsuper[jsup];               /* j = xsuper[jsup+1]-xsuper[jsup] */
/* ------------------------------------------------------------
   For each affected supernode jsup, insert ALL MEMBERS of jsup.
   j is #members jsup; if j < #nzs in column, then link to next
   affected supernode.
   ------------------------------------------------------------ */
    while(xlindx[jsup] + j < xlindx[jsup + 1]){
      jsup = snode[lindx[xlindx[jsup] + j]];         /* next affected snode */
      mxAssert(inz < ph2.maxnnz,"");
      for(j = xsuper[jsup]; j < xsuper[jsup+1]; j++)
        u12ir[inz++] = j;              /* insert complete supernode */
      j -= xsuper[jsup];               /* j = xsuper[jsup+1]-xsuper[jsup] */
    }
  }
  u12jc[ph2.n] = inz;
}

/* ************************************************************
   PROCEDURE makex22
   INPUT
     d        - length m vector: diagonal in LDL factorization. The
                subvector d(ph2nodes) has its sign reversed.
     ph2nodes - length nph2 list of Phase 2 nodes.
     nph2     - number of columns in u12.
     m        - total number of nodes (= possible # row indices)
     snode    - length m array, mapping each subnode to its supernode.
     ph2len   - length nsuper array, giving the below-diagonal length in
               u12 if column xsuper[node] is postponed to phase 2.
     xsuper   - length nsuper+1 array, listing 1st subnode of each supernode.
   UPDATED
     u12      - Sparse m x nph2. On return, u12(ph2nodes,:) = 0,
                and u12(:,k)./d for all k=1:nph2.
   OUTPUT
     x22      -  nph2 x nph2 full matrix. On output, x22 = diag(d(ph2nodes))
                 +  u12IN(ph2nodes,:) - u12OUT'*diag(d(ph2nodes))*u12OUT.
   WORKING ARRAY:
     iwork - length nph2 working array, to store below-diag column-starts u12.
     fwork - length m working array, used to store d.*u12(:,k) in full.
   ************************************************************ */
void makex22(double *x22, jcir u12, const int *ph2nodes, double *d,
             const int *snode, const int *ph2len, const int *xsuper,
             const int nph2, const int m, int *iwork, double *fwork)
{
  int firsti, lasti, k,j,inz,i, node;
  double *x22k;
  double xjk;

/* ------------------------------------------------------------
   INITIALIZE: let fwork(ph2nodes[0],m-ph2nodes[0]) = 0.
   ------------------------------------------------------------ */
  if(nph2 < 1)
    return;
  fzeros(fwork + ph2nodes[0], m-ph2nodes[0]);
/* ------------------------------------------------------------
   Compute tril(x22).  diag(x22) = d(ph2nodes),
   x22(j,k) = u12(ph2nodes(j),k) for j > k.
   Set u12(ph2nodes(j),k) = 0.0 and d(ph2nodes) = 1 after using.
   ------------------------------------------------------------ */
  lasti = u12.jc[0];
  for(k = 0, x22k = x22; k < nph2; x22k += nph2){
    x22k[k] = -d[ph2nodes[k]];                   /* diag(x22) = -d(ph2nodes) */
    firsti = lasti;
    lasti = u12.jc[++k];
    for(j = k; j < nph2; j++){
      if( intbsearch(&firsti, u12.ir, lasti, ph2nodes[j]) ){
        x22k[j] = u12.pr[firsti];                       /* move to x22(j,k) */
        u12.pr[firsti++] = 0.0;
      }
    }
  }
/* ------------------------------------------------------------
   Let tril(x22) -= tril(u12END' * diag(d) * u12END).
   Here, u12END means only the rows below the former diagonal entries:
   the changes above have already been incorporated into x22.
   ------------------------------------------------------------ */
/* ------------------------------------------------------------
   Let iwork(0:nph2-1) be the below-diagonal column starts within
   u12. Thus, iwork[k] = u12.jc[k+1] - ph2len[node] + (j-firstj),
   where j=ph2nodes[k], node=snode[j], firstj=xsuper[node].
   ------------------------------------------------------------ */
  for(k = 0; k < nph2; k++){
    j = ph2nodes[k];
    node = snode[j];
    iwork[k] = u12.jc[k+1] - ph2len[node] + j - xsuper[node];
  }
/* ------------------------------------------------------------
   For each column k, let fwork = full(u12(:,k).*d).
   NOTE: the actual column is k-1. We first treat only cols 0:nph2-2.
   ------------------------------------------------------------ */
  for(k = 1, x22k = x22; k < nph2; k++){
    lasti = u12.jc[k];
    for(inz = iwork[k-1]; inz < lasti; inz++){
      i = u12.ir[inz];
      fwork[i] = u12.pr[inz] * d[i];              /* fwork = u12(:,k) .* d */
    }
/* ------------------------------------------------------------
   Below diagonal, let x22(:,k) -= u12(:,k:nph2)' * fwork
   ------------------------------------------------------------ */
    for(j = k-1; j < nph2; j++){
      for(xjk = 0.0, inz = iwork[j]; inz < u12.jc[j+1]; inz++)
        xjk += u12.pr[inz] * fwork[u12.ir[inz]];
      x22k[j] -= xjk;                             /* x(j,k) -= u(:,j)'*fwork */
    }
    for(inz = iwork[k-1]; inz < lasti; inz++)               /* fwork = 0 */
      fwork[u12.ir[inz]] = 0.0;
    x22k += nph2;                          /* to next column */
  }
/* ------------------------------------------------------------
   It remains to update x22(nph2,nph2) -= u12(:,nph2)*(u12(:,nph2)./=d).
   ------------------------------------------------------------ */
  for(xjk = 0.0, inz = iwork[nph2-1]; inz < u12.jc[nph2] ; inz++){
    xjk += SQR(u12.pr[inz]) * d[u12.ir[inz]];
  }
  x22k[nph2-1] -= xjk;
/* ------------------------------------------------------------
   Copy lower to upper triangle: Let triu(x22,1) = tril(x22,-1)'.
   ------------------------------------------------------------ */
  tril2sym(x22,nph2);
}
   

/* ------------------------------------------------------------
   PERMUTEP - Let L = tril(P(perm,perm))
   INPUT
     Ljc, Lir - sparsity structure of output matrix L = tril(P(perm,perm)).
     Pjc, Pir, Ppr - Input matrix, before ordering.
     perm     - length m pivot ordering.
     m        - order: P is m x m.
   WORKING ARRAY
      Pj  - Length m float work array.
   IMPORTANT: L, P and PERM in C style.
   ------------------------------------------------------------ */
void permuteP(const int *Ljc,const int *Lir,double *Lpr,
              const int *Pjc,const int *Pir,const double *Ppr,
              const int *perm, double *Pj, const int m)
{
  int j,inz,jcol;
/* ------------------------------------------------------------
   Let Pj = all-0
   ------------------------------------------------------------ */
  fzeros(Pj,m);
/* ------------------------------------------------------------
   For each column j, let
    Pj(:) = P(:,PERM(j))   and    L(:,j) = Pj(PERM(:))  (L sparse)
   ------------------------------------------------------------ */
  for(j = 0; j < m; j++){
    jcol = perm[j];
    for(inz = Pjc[jcol]; inz < Pjc[jcol+1]; inz++)
      Pj[Pir[inz]] = Ppr[inz];
    for(inz = Ljc[j]; inz < Ljc[j+1]; inz++)
      Lpr[inz] = Pj[perm[Lir[inz]]];
/* ------------------------------------------------------------
   Let Pj = all-0
   ------------------------------------------------------------ */
    for(inz = Pjc[jcol]; inz < Pjc[jcol+1]; inz++)
      Pj[Pir[inz]] = 0.0;
  }
}

/* ************************************************************
   SPCHOL - calls the block cholesky blkLDL.
   INPUT:
      m       - Order of L: L is m x m, ne.At is N x m.
      nsuper  - Number of supernodes (blocks).
      xsuper  - Length nsuper+1: first simple-node of each supernode
      snode   - Length neqns: snode(node) is the supernode containing "node".
      ljc     - Length neqns+1: start of the columns of L.
      tol     - Tolerance parameters for numerical control.
      iwsiz, fwsiz - size of integer and floating-point working storage.
               See "WORKING ARRAYS" for required amount.
   UPDATED:
      lindx   - row indices. On INPUT: for each column (by ljc),
          on OUTPUT: for each supernode (by xlindx).
      Lpr     - On input, contains tril(X), on output,
          diag(Lpr) == "D" and tril(Lpr,-1) = "L" - eye(m)
	  such that   X = L*D*L'.
      pph2    - *pph2 is a structure on which nodes are postponed to Phase-2;
         such nodes would be numerically unstable here (see tol-structure).
         On INPUT:
         pph2->nodes(neqns), pph2->u12jc(neqns+1), pph2->u12ir(pph2->maxnnz),
         and pph2->u12.pr(pph2->maxnnz) are allocated.
         On OUTPUT:  pph2->len(nsuper) gives remaining nnz of L(xsuper(node),:)
         if the pivot xsuper(node) is postponed to phase 2.
         pph2->n is number of postponed nodes,
         pph2->maxnnz is nnz in pph2->u12.{ir,pr}, which is re-allocated
         accordingly; pph2->u12jc is re-allocated to pph2->n+1.
      ph2len - Length nsuper array, which will be assigned to pph2->len.
   OUTPUT
      xlindx  - Length nsuper+1: Start of sparsity structure in lindx,
              for each supernode (all simple nodes in a supernode have the
              same nonzero-structure).
      snode  - Length m: snode(node) is the supernode containing "node".
   WORKING ARRAYS:
      iwork  - Length iwsiz working array, used for
           link(nsuper), length(nsuper), ph2pattern(nsuper), ph2head(nsuper),
           ph2link(m), irInv(m), relind(m), u12jc(m+1),
           iworkNEW(2*nsuper).
           iwsiz = 4*m + 6*nsuper + 1
      fwork  - Length fwsiz=L.tmpsiz, used for precorrection of snode-cols.
   RETURNS - ndep, number of dependent columns. The col.numbers are listed
       from the bottom of pph2->nodes.
   *********************************************************************** */
int spchol(const int m, const int nsuper, const int *xsuper,
           int *snode,	int *xlindx, int *lindx,
           const int *ljc, double *lpr, const int *perm,
           const int numlvl, const tolstruct tol, ph2struct *pph2,
           int *ph2len,
           const int iwsiz, int *iwork, const int fwsiz, double *fwork)
{
  int jsup,j,ix,jcol,collen;
  
/* ------------------------------------------------------------
   SNODE: map each column to the supernode containing it
   ------------------------------------------------------------ */
  j = xsuper[0];
  for(jsup = 0; jsup < nsuper; jsup++){
    while(j < xsuper[jsup + 1])
      snode[j++] = jsup;
  }
/* ------------------------------------------------------------
   COMPRESS SUBSCRIPTS:
    Let (xlindx,lindx) = ljc(xsuper(:)), i.e store only once
    for each snode, instead of once per column.
   ------------------------------------------------------------ */
  for(ix = 0, jsup = 0; jsup < nsuper; jsup++){
    xlindx[jsup] = ix;
    jcol = xsuper[jsup];
    collen = ljc[jcol+1] - ljc[jcol];
    memmove(lindx + ix, lindx + ljc[jcol], collen * sizeof(int));
    ix += collen;
  }
  xlindx[nsuper] = ix;
  if(numlvl > 0){
/* ------------------------------------------------------------
   Compute pph2->len(1:nsuper): below-diag nnzs of u12(:,k) for first
   column in supernode, if this column were to be postponed to phase 2.
   ------------------------------------------------------------ */
    for(jsup = nsuper-1; jsup >= 0; jsup--){
      collen = xsuper[jsup + 1] - xsuper[jsup];  /* length of supernode */
      ix = xlindx[jsup] + collen;
      if(ix < xlindx[jsup + 1])
        collen += ph2len[ snode[lindx[ix]] ];  /* + PH2 length 1st off-diag */
      ph2len[jsup] = collen;
    }
    for(jsup = 0; jsup < nsuper; jsup++)        /* subtract 1 for diagonal. */
      --ph2len[jsup];
    pph2->len = ph2len;               /* from now on, it's constant */
/* ------------------------------------------------------------
   Do the block sparse Cholesky L*D*L'
   ------------------------------------------------------------ */
    return blkLDL(m, nsuper, xsuper, snode, xlindx, lindx, ljc, lpr, perm,
                  tol, pph2, iwsiz, iwork, fwsiz, fwork);
  }
  else{        /* numlevel = 0: no numerical reordering */
    return blkLDL0(m, nsuper, xsuper, snode, xlindx, lindx, ljc, lpr, perm,
                   tol.dep,pph2->nodes, iwsiz, iwork, fwsiz, fwork);
  }
}

/* ============================================================
   MAIN: MEXFUNCTION
   ============================================================ */
/* ************************************************************
   PROCEDURE mexFunction - Entry for Matlab
   ************************************************************ */
void mexFunction(const int nlhs, mxArray *plhs[],
  const int nrhs, const mxArray *prhs[])
{
  const mxArray *L_FIELD;
  mxArray *myplhs[NPAROUT];
  int    m, i, j, iwsiz, nsuper, tmpsiz, fwsiz, ndep, m1, numlvl;
  double *fwork, *d, *l22, *ph2nodesPr, *depPr;
  const double *permPr,*xsuperPr,*Ppr;
  int    *perm, *snode, *xsuper, *iwork, *xlindx, *ph2len;
  const int *LINjc, *LINir, *Pjc, *Pir;
  double maxdiag;
  jcir   L;
  tolstruct tol;
  ph2struct ph2;
/* ------------------------------------------------------------
   Check for proper number of arguments
   ------------------------------------------------------------ */
  if(nrhs < NPARIN)
    mexErrMsgTxt("blkchol requires more input arguments");
  if(nlhs > NPAROUT)
    mexErrMsgTxt("blkchol produces less output arguments");
/* ------------------------------------------------------------
   Get input matrix P to be factored.
   ------------------------------------------------------------ */
  if( (m = mxGetM(P_IN)) != mxGetN(P_IN))
    mexErrMsgTxt("P must be square");
  if(!mxIsSparse(P_IN))
    mexErrMsgTxt("P must be sparse");
  Pjc    = mxGetJc(P_IN);
  Pir    = mxGetIr(P_IN);
  Ppr    = mxGetPr(P_IN);
/* ------------------------------------------------------------
   Disassemble block Cholesky structure L
   ------------------------------------------------------------ */
  if(!mxIsStruct(L_IN))
    mexErrMsgTxt("Parameter `L' should be a structure.");
  if( (L_FIELD = mxGetField(L_IN,0,"perm")) == NULL)        /* L.perm */
    mexErrMsgTxt("Missing field L.perm.");
  if(m != mxGetM(L_FIELD) * mxGetN(L_FIELD))
    mexErrMsgTxt("perm size mismatch");
  permPr = mxGetPr(L_FIELD);
  if( (L_FIELD = mxGetField(L_IN,0,"L")) == NULL)           /* L.L */
    mexErrMsgTxt("Missing field L.L.");
  if( m != mxGetM(L_FIELD) || m != mxGetN(L_FIELD) )
    mexErrMsgTxt("Size L.L mismatch.");
  if(!mxIsSparse(L_FIELD))
    mexErrMsgTxt("L.L should be sparse.");
  LINjc = mxGetJc(L_FIELD);
  LINir = mxGetIr(L_FIELD);
  if( (L_FIELD = mxGetField(L_IN,0,"xsuper")) == NULL)      /* L.xsuper */
    mexErrMsgTxt("Missing field L.xsuper.");
  nsuper = mxGetM(L_FIELD) * mxGetN(L_FIELD) - 1;
  if( nsuper > m )
    mexErrMsgTxt("Size L.xsuper mismatch.");
  xsuperPr = mxGetPr(L_FIELD);
  if( (L_FIELD = mxGetField(L_IN,0,"tmpsiz")) == NULL)      /* L.tmpsiz */
    mexErrMsgTxt("Missing field L.tmpsiz.");
  tmpsiz   = mxGetScalar(L_FIELD);
/* ------------------------------------------------------------
   Disassemble pars structure: deptol, numlvl, maxu, ph1tol
   ------------------------------------------------------------ */
  if(!mxIsStruct(PARS_IN))
    mexErrMsgTxt("Parameter `pars' should be a structure.");
  if( (L_FIELD = mxGetField(PARS_IN,0,"deptol")) == NULL)  /* pars.deptol */
    mexErrMsgTxt("Missing field pars.deptol.");
  tol.dep  = mxGetScalar(L_FIELD);
  if( (L_FIELD = mxGetField(PARS_IN,0,"numlvl")) == NULL)  /* pars.numlvl */
    mexErrMsgTxt("Missing field pars.deptol.");
  numlvl = mxGetScalar(L_FIELD);
  if(numlvl > 0){
    if( (L_FIELD = mxGetField(PARS_IN,0,"ph1tol")) == NULL)  /* pars.ph1tol */
      mexErrMsgTxt("Missing field pars.ph1tol.");
    tol.ph1  = mxGetScalar(L_FIELD);
    if( (L_FIELD = mxGetField(PARS_IN,0,"maxu")) == NULL)  /* pars.maxu */
      mexErrMsgTxt("Missing field pars.maxu.");
    tol.maxu = mxGetScalar(L_FIELD);
  }
/* ------------------------------------------------------------
   Create sparse output matrix L(m x m).
   ------------------------------------------------------------ */
  L_OUT = mxCreateSparse(m,m, LINjc[m],mxREAL);
  L.jc  = mxGetJc(L_OUT);
  L.ir  = mxGetIr(L_OUT);
  L.pr  = mxGetPr(L_OUT);
  memcpy(L.jc, LINjc, (m+1) * sizeof(int));
  memcpy(L.ir, LINir, LINjc[m] * sizeof(int));
/* ------------------------------------------------------------
   Compute required sizes of working arrays:
   iwsiz = 4*m + 6*nsuper + 1.
   fwsiz = MAX(m, tmpsiz)
   ------------------------------------------------------------ */
  if(numlvl > 0)
    iwsiz = 4 * m + 6 * nsuper + 1;
  else
    iwsiz = MAX(2*(m+nsuper), 1);
  m1 = MAX(m,1);                  /* avoid alloc to 0 */
  fwsiz = MAX(m1, tmpsiz);
/* ------------------------------------------------------------
   Allocate working arrays:
   integer: perm(m), snode(m), xsuper(nsuper+1),
      iwork(iwsiz), xlindx(m+1), ph2len(nsuper), ph2.nodes(m),
   double: fwork(fwsiz).
   ------------------------------------------------------------ */
  perm      = (int *) mxCalloc(m1,sizeof(int)); 
  snode     = (int *) mxCalloc(m1,sizeof(int)); 
  xsuper    = (int *) mxCalloc(nsuper+1,sizeof(int));
  iwork     = (int *) mxCalloc(iwsiz,sizeof(int));
  xlindx    = (int *) mxCalloc(m+1,sizeof(int));
  ph2.nodes = (int *) mxCalloc(m1, sizeof(int));
  ph2len    = (int *) mxCalloc(nsuper,sizeof(int));
  fwork   = (double *) mxCalloc(fwsiz,sizeof(double)); 
/* ------------------------------------------------------------
   Convert PERM, XSUPER to integer and C-Style
   ------------------------------------------------------------ */
  for(i = 0; i < m; i++){
    j = permPr[i];
    perm[i] = --j;
  }
  for(i = 0; i <= nsuper; i++){
    j =  xsuperPr[i];
    xsuper[i] = --j;
  }
/* ------------------------------------------------------------
   Let L = tril(P(PERM,PERM)), uses fwork(m) working storage.
   ------------------------------------------------------------ */
  permuteP(L.jc,L.ir,L.pr, Pjc,Pir,Ppr, perm, fwork, m);
/* ------------------------------------------------------------
   For numlvl > 0 ONLY:
   Let tol.ph1 *= max([1;diag(L)]).
   ------------------------------------------------------------ */
  if(numlvl > 0){
    for(i = 0, maxdiag = 1.0; i < m; i++)
      if(L.pr[L.jc[i]] > maxdiag)
        maxdiag = L.pr[L.jc[i]];
    tol.ph1 *= maxdiag;
/* ------------------------------------------------------------
   For numlvl > 0 ONLY:
   Allocate u12 initially as sparse
   u12(m,m,maxnnz), where we heuristically choose maxnnz = 10*m.
   ------------------------------------------------------------ */
    ph2.maxnnz = 1+10 * m;
    ph2.u12.jc = (int *) mxCalloc(m + 1, sizeof(int));
    ph2.u12.pr = (double *) mxCalloc(ph2.maxnnz, sizeof(double));
    ph2.u12.ir = (int *) mxCalloc(ph2.maxnnz, sizeof(int));
  }
  else{                   /* numlvl = 0: dummy u12 */
    ph2.n = 0;
    ph2.maxnnz = 0;
    ph2.u12.jc = (int *) mxCalloc(2, sizeof(int));
    ph2.u12.pr = (double *) NULL;
    ph2.u12.ir = (int *) NULL;
  }
/* ------------------------------------------------------------
   Create "snode" and "xlindx"; change L.ir to the compact subscript
   array (with xlindx), and do BLOCK SPARSE CHOLESKY (phase I).
   ------------------------------------------------------------ */
  ndep = spchol(m, nsuper, xsuper, snode, xlindx, L.ir, L.jc, L.pr, perm,
                numlvl, tol, &ph2, ph2len, iwsiz, iwork, fwsiz, fwork);
/* ------------------------------------------------------------
   Create output matrix u12 = sparse(m,ph2.n,ph2.maxnnz), and set
   float-nonzeros to ph2.u12pr.
   ------------------------------------------------------------ */
  U12_OUT = mxCreateSparse(m,ph2.n, 1,mxREAL);
  mxFree(mxGetJc(U12_OUT));                    /* jc */
  mxSetJc(U12_OUT, ph2.u12.jc);
  mxFree(mxGetPr(U12_OUT));                    /* pr */
  mxSetPr(U12_OUT, ph2.u12.pr);
  mxFree(mxGetIr(U12_OUT));                    /* ir */
  mxSetIr(U12_OUT, ph2.u12.ir);
  mxSetNzmax(U12_OUT, ph2.maxnnz);
/* ------------------------------------------------------------
   Create output vector d(m) and output matrix L22(ph2.n,ph2.n).
   ------------------------------------------------------------ */
  D_OUT = mxCreateDoubleMatrix(m,1,mxREAL);
  d     = mxGetPr(D_OUT);
  L22_OUT = mxCreateDoubleMatrix(ph2.n,ph2.n,mxREAL);
  l22     = mxGetPr(L22_OUT);
/* ------------------------------------------------------------
   Let d = diag(L)
   ------------------------------------------------------------ */
  for(j = 0; j < m; j++)
    d[j] = L.pr[L.jc[j]];
#ifndef NDEBUG
  for(j = 0; j < m; j++)
    L.pr[L.jc[j]] = 1.0;
#endif
/* ------------------------------------------------------------
   Compute L22 = X(ph2nodes,ph2nodes) - U12'*diag(d(ph2nodes))*U12.
   Uses iwork(ph2.n) and fwork(m) work storage.
   ------------------------------------------------------------ */
  if(numlvl > 0)
    makex22(l22, ph2.u12, ph2.nodes, d, snode, ph2len, xsuper, ph2.n, m,
            iwork, fwork);
/* ------------------------------------------------------------
   Create output vectors
   ph2nodesPr(:) = ph2nodes
   ------------------------------------------------------------ */
  PH2NODES_OUT = mxCreateDoubleMatrix(ph2.n,1,mxREAL);       /* ph2nodes */
  ph2nodesPr = mxGetPr(PH2NODES_OUT);
  for(j = 0; j < ph2.n; j++)
    ph2nodesPr[j] = ph2.nodes[j] + 1.0;
/* ------------------------------------------------------------
   Create output vector dep(ndep), dep = ph2nodes(m-ndep:m-1).
   ------------------------------------------------------------ */
  DEP_OUT = mxCreateDoubleMatrix(ndep,1,mxREAL);
  depPr   = mxGetPr(DEP_OUT);
  for(j = 0; j < ndep; j++)
    depPr[j] = ph2.nodes[m-1-j] + 1.0;
/* ------------------------------------------------------------
   Copy original row-indices from LINir to L.ir.
   ------------------------------------------------------------ */
  memcpy(L.ir, LINir, LINjc[m] * sizeof(int));
/* ------------------------------------------------------------
   Release working arrays.
   ------------------------------------------------------------ */
  mxFree(fwork);
  mxFree(ph2len);
  mxFree(ph2.nodes);
  mxFree(xlindx);
  mxFree(iwork);
  mxFree(xsuper);
  mxFree(snode);
  mxFree(perm);
/* ------------------------------------------------------------
   Copy requested output parameters (at least 1), release others.
   ------------------------------------------------------------ */
  i = MAX(nlhs, 1);
  memcpy(plhs,myplhs, i * sizeof(mxArray *));
  for(; i < NPAROUT; i++)
    mxDestroyArray(myplhs[i]);
}
