/*
mex cholp.c blkaux.c


 [U,d,ph2perm,dep] = cholp(X, deptol)

    This file is part of SeDuMi 1.03BETA
    Copyright (C) 1999 Jos F. Sturm
    Dept. Quantitative Economics, Maastricht University, the Netherlands.
    Affiliations up to SeDuMi 1.02 (AUG1998):
      CRL, McMaster University, Canada.
      Supported by the Netherlands Organization for Scientific Research (NWO).
  
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
  
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
  
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

*/

#include <string.h>
#include "mex.h"
#include "blksdp.h"
#include "blkchol.h"


#define U_OUT myplhs[0]
#define D_OUT myplhs[1]
#define PERM_OUT myplhs[2]
#define DEP_OUT myplhs[3]
#define NPAROUT 4

#define P_IN      prhs[0]
#define DEPTOL_IN prhs[1]
#define NPARIN 2

/* ************************************************************
   TIME-CRITICAL PROCEDURE -- isrealHadadiv
   Computes  x ./= y  using loop-unrolling.
   ************************************************************ */
void isrealHadadiv(double *x, const double *y, const int n)
{
 int i;

 for(i = 0; i < n-7; i++){              /* LEVEL 8 */
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i];
 }
/* ------------------------------------------------------------
   Now, i in {n-7, n-6, n-5, .. ,n}. Do the last n-i elements.
   ------------------------------------------------------------ */
 if(i < n-3){                        /* LEVEL 4 */
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
 }
 if(i < n-1){                        /* LEVEL 2 */
   x[i] /= y[i]; i++;
   x[i] /= y[i]; i++;
 }
 if(i < n)                           /* LEVEL 1 */
   x[i] /= y[i];
}

/* ************************************************************
   PROCEDURE cholpivot: Cholesky with diagonal pivoting
   INPUT
     x       - n x n full matrix to be factored.
     n       - order of matrix x.
     deptol  - If d(k) drops below deptol, then discard this column
       as being dependen (d(k) = 0, u(:,k) = k-th column identity matrix)
   OUTPUT
     d       - n-vector: diagonal elements (in stable permuted order),
               thus d(1:n) corresponds to columns u(:,perm).
     perm    - length n array, stable permutation.
     dep     - length ndep <= n array. Lists dependent (skipped) pivots.
              Thus, ph2nodes[perm[dep]] gives the actual dep-node-numbers.
     u       - n x n full matrix. On OUTPUT, u is the Cholesky factor:
              X = U'*DIAG(D)*U.
              u(:,perm) is unit-diag upper triangular.
              (We store columns in original order, rows in
              stable order).
   RETURNS
     ndep, number of dependent (phase 2) nodes, i.e. nnz(dep).
   ************************************************************ */
int cholpivot(double *u,double *d,int *perm, int *dep,
              const double *x, const int n, const double deptol)
{
  int i,j,k,imax,icol,ndep;
  double dk,uki;
  double *uk, *rowuk;
  const double *xk;

/* ------------------------------------------------------------
   Initialize: d = diag(x),   perm = 0:n-1, ndep = 0.
   ------------------------------------------------------------ */
  for(xk = x, k = 0; k < n; xk += n, k++)
    d[k] = xk[k];
  for(i = 0; i < n; i++)
    perm[i] = i;
  ndep = 0;
/* ------------------------------------------------------------
   Pivot in step k=0:n-1 on imax:
   u(1:k-1,imax) ./= d(1:k-1),
   u(k,j) = x(imax,j) - u(:,imax)'*u(1:k-1,j)   for j = perm(k+1:n-1).
   ------------------------------------------------------------ */
  for(k = 0; k < n; k++){
/* ------------------------------------------------------------
   Let [imax,dk] = max(d(k:m))
   ------------------------------------------------------------ */
    dk = d[k]; imax = k;
    for(i = k + 1; i < n; i++)
      if(d[i] > dk){
        imax = i;
        dk = d[i];
      }
/* ------------------------------------------------------------
   k-th pivot is j=perm[imax]: row "rowuk"=u(k,:), column "uk"=u(:,j).
   ------------------------------------------------------------ */
    d[imax] = d[k];
    j = perm[imax];                     /* original node number */
    uk = u + j * n;
    rowuk = u + k;
    perm[imax] = perm[k];
    perm[k] = j;
/* ------------------------------------------------------------
   Let uk = uk(0:k-1)./d.
   ------------------------------------------------------------ */
    isrealHadadiv(uk, d, k);        /* uk(0:k-1) ./= d(0:k-1) */
#ifndef NDEBUG
    uk[k] = 1.0;
#endif
/* ------------------------------------------------------------
   If dk > deptol:
   Set    d(k) = dk.
   Let    rowuk = x(k,:)-uk'*u(0:k-1,:),
   update d(k+1:n-1) -= rowuk.^2 / dk.
   ------------------------------------------------------------ */
    if(dk > deptol){
      d[k] = dk;
      for(i = k + 1; i < n; i++){
        icol = perm[i] * n;
        uki = x[icol + j] - realdot(uk, u+icol, k);
        rowuk[icol] = uki;
        d[i] -= SQR(uki) / dk;       /* d(:) -= u(k,:).^2 / dk */
      }
    }
/* ------------------------------------------------------------
   But if dk dropped below deptol, then set rowuk = all-0.
   ------------------------------------------------------------ */
    else{
      dep[ndep++] = k;              /* k-th pivot is lin. dependent */
      d[k] = 1.0;                   /* avoid division by zero (temporarily) */
      for(i = k + 1; i < n; i++)
        rowuk[perm[i] * n] = 0.0;    /* discard row k */
    }
  }
/* ------------------------------------------------------------
   Factorization is finished.
   Set d(dep) = 0.
   ------------------------------------------------------------ */
  for(k = 0; k < ndep; k++)
    d[dep[k]] = 0.0;
/* ------------------------------------------------------------
   Return number of dependent phase 2 columns.
   ------------------------------------------------------------ */
  return ndep;
}

/* ============================================================
   MAIN: MEXFUNCTION
   ============================================================ */
/* ************************************************************
   PROCEDURE mexFunction - Entry for Matlab
   ************************************************************ */
void mexFunction(const int nlhs, mxArray *plhs[],
  const int nrhs, const mxArray *prhs[])
{
  mxArray *myplhs[NPAROUT];
  int i,j,n,ndep;
  double *u, *d, *permPr, *depPr;
  const double *p;
  int *newperm, *dep;
  double deptol;
/* ------------------------------------------------------------
   Check for proper number of arguments
   ------------------------------------------------------------ */
  if(nrhs < NPARIN)
    mexErrMsgTxt("cholp requires more input arguments");
  if(nlhs > NPAROUT)
    mexErrMsgTxt("cholp produces less output arguments");
/* ------------------------------------------------------------
   Get full input matrix P to be factored.
   ------------------------------------------------------------ */
  if( (n = mxGetM(P_IN)) != mxGetN(P_IN))
    mexErrMsgTxt("P must be square");
  if(mxIsSparse(P_IN))
    mexErrMsgTxt("P must be full");
  p = mxGetPr(P_IN);
/* ------------------------------------------------------------
   Get scalar parameter deptol
   ------------------------------------------------------------ */
  deptol = mxGetScalar(DEPTOL_IN);
/* ------------------------------------------------------------
   Allocate working arrays: newperm(n), dep(n)
   ------------------------------------------------------------ */
  newperm   = (int *) mxCalloc(MAX(n,1), sizeof(int));
  dep       = (int *) mxCalloc(MAX(n,1), sizeof(int));
/* ------------------------------------------------------------
   Create outputs U,d, perm.
   ------------------------------------------------------------ */
  U_OUT = mxCreateDoubleMatrix(n,n,mxREAL);
  u = mxGetPr(U_OUT);
  D_OUT = mxCreateDoubleMatrix(n,1,mxREAL);
  d = mxGetPr(D_OUT);
  PERM_OUT = mxCreateDoubleMatrix(n,1,mxREAL);
  permPr = mxGetPr(PERM_OUT);
/* ------------------------------------------------------------
   COMPUTE the pivoted Cholesky:
   ------------------------------------------------------------ */
  ndep = cholpivot(u,d,newperm, dep, p, n, deptol);
/* ------------------------------------------------------------
   Create output dep(ndep).
   ------------------------------------------------------------ */
  DEP_OUT = mxCreateDoubleMatrix(ndep,1,mxREAL);
  depPr = mxGetPr(DEP_OUT);
/* ------------------------------------------------------------
   Let perm = newperm(1:n) + 1.0, dep = dep(1:ndep) + 1.0.
   ------------------------------------------------------------ */
  for(i = 0; i < n; i++)
    permPr[i] = newperm[i] + 1.0;
  for(i = 0; i < ndep; i++)
    depPr[i] = dep[i] + 1.0;
/* ------------------------------------------------------------
   Release working arrays:
   ------------------------------------------------------------ */
  mxFree(dep);
  mxFree(newperm);
/* ------------------------------------------------------------
   Copy requested output parameters (at least 1), release others.
   ------------------------------------------------------------ */
  i = MAX(nlhs, 1);
  memcpy(plhs,myplhs, i * sizeof(mxArray *));
  for(; i < NPAROUT; i++)
    mxDestroyArray(myplhs[i]);
}
