/*
    DFT++ is a density functional package developed by the research group
    of Professor Tomas Arias

    Copyright 1996-2003 Sohrab Ismail-Beigi

    This file is part of DFT++.

    DFT++ is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DFT++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DFT++; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Please see the file CREDITS for a list of authors.

    For academic users, we request that publications using results obtained with
    this software reference

    "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
    and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).

    and, if using the wavelet basis, further reference

    "Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
    T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).

    and 

    "Robust ab initio calculation of condensed matter: transparent convergence through
    semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
    Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).

    For your convenience, preprints of the above articles may be obtained from
    http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/

//
// This is just a simple routine that diagonalizes a complex Hermetian
// matrix using the Jacobi method (e.g. see Numerical Recipes).  It
// is slow but works, and is definitely portable!  To be used when
// you are having trouble interfacing with LAPACK or ESSL.
//
#include "header.h"

// Only include code if we need it
#ifdef DFT_USE_JACOBI

//
// For now this is only good for complex numbers...
//
#ifdef SCALAR_IS_COMPLEX
void
Jacobi(ComplexMatrix &Ain,real *evals, ComplexMatrix &evecs)
{
  if (Ain.nr != Ain.nc)
    die("A is not square in Jacobi()\n");
  if (!Ain.hermetian)
    die("A is not hermetian in Jabobi()\n");


  int n = Ain.nr;
  int i,j;
  int sweep;
  real S,thresh;
  int p,q;
  real fabsApq,fabsApp,fabsAqq,maxoffdiag;
  real a,c,blen,sitheta,temp;
  complex b;
  real t,zeta;
  real alpha,stau;
  complex beta,betastar;
  complex zp,zq;

#define MAXSWEEP 50

  /* local copy to work on */
  ComplexMatrix A(Ain);

  /* Copy Ain to A */
  for (i=0; i < n*n; i++)
    A.c[i] = Ain.c[i];

  /* Set evecs to identity */
  for (i=0; i < n; i++)
    for (j=0; j < n; j++)
      {
	if (i==j) evecs(i,j) = 1.0;
	else      evecs(i,j) = 0.0;
      }

  for (sweep=0; sweep < MAXSWEEP; sweep++)
    {
      /* S = sum of abs of real and imag of off diag terms */
      /* maxoffdiag is maximum size of off-diagonal element, and (p,q)
       * will contain its coordinates in the matrix A */
      maxoffdiag = 0.0;
      S = 0.0;
      for (i=0; i < n; i++)
	for (j=i+1; j < n; j++)
	  {
	    temp = fabs(A(i,j).x)+fabs(A(i,j).y);
	    S += temp;
	    if (temp >= maxoffdiag)
	      {
		maxoffdiag = temp;
		p = i;
		q = j;
	      }
	  }

      /* If we're done to machine precision, go home! */
      fabsApp = fabs(A(p,p).x)+fabs(A(p,p).y);
      fabsAqq = fabs(A(q,q).x)+fabs(A(q,q).y);
      if ( (maxoffdiag+fabsApp==fabsApp) && (maxoffdiag+fabsAqq==fabsAqq) )
	break;

      /* set threshold */
      if (sweep < 5)
	thresh = 0.4*S/(n*n);
      else
	thresh = 0.0;

      /* Loop over off diagonal terms of A in upper triangle: p < q */
      for (p=0; p < n; p++)
	for (q=p+1; q < n; q++)
	  {
	    /* If A(p,q) is too small compared to A(p,p) and A(q,q),
	     * skip the rotation */
	    fabsApq = fabs(A(p,q).x)+fabs(A(p,q).y);
	    fabsApp = fabs(A(p,p).x)+fabs(A(p,p).y);
	    fabsAqq = fabs(A(q,q).x)+fabs(A(q,q).y);
	    if ( (fabsApq+fabsApp==fabsApp) && (fabsApq+fabsAqq==fabsAqq) )
	      continue;
	    
	    /* If A(p,q) is smaller than the threshold, then also skip
	     * the rotation */
	    if (fabsApq <= thresh)
	      continue;

	    /* the 2x2 matrix we diagonalize is [ [a b] ; [conj(b) a] ] */
	    a = REAL(A(p,p));
	    c = REAL(A(q,q));
	    b = A(p,q);
	    blen = sqrt(b.x*b.x+b.y*b.y);
	    zeta = (c-a)/(2.0*blen);

	    /* t = sgn(zeta)/(|zeta|+sqrt(zeta^2+1)), but if zeta is too
	     * huge, then we set t = 1/(2*zeta) */
	    if ( fabs(zeta)>1.0e200 )
	      t = 1/(2.0*zeta);
	    else
	      {
		t = 1.0/(fabs(zeta)+sqrt(zeta*zeta+1.0));
		if (zeta<0.0)
		  t = -t;
	      }

	    /* The matrix we use to diagonalize the 2x2 block above is
	     * [ [alpha beta] ; [-conj(beta) alpha] ] 
	     * where alpha is real and positive and alpha = cos(theta)
	     * and beta = sin(theta)*b/|b|.
	     * The angle theta is chosen to diagonalize the 2x2 block.
	     * The relevant formula are sin(theta)=cos(theta)*t and
	     * cos(theta)=1/sqrt(1+t^2).
	     * stau = (1-alpha) cleverly written. */
	    alpha = 1.0/sqrt(t*t+1.0);
	    sitheta = t*alpha;
	    stau = sitheta*sitheta/(1.0+alpha);
	    temp = sitheta/blen;
	    beta.x = temp*b.x;
	    beta.y = temp*b.y;
	    betastar.x =  beta.x;
	    betastar.y = -beta.y;

	    /* Now we update the elements of A: */
	    /* This involves chaning the p'th and q'th column of A */
	    for (i=0; i < n; i++)
	      {
		if (i==p)
		  {
		    A(i,p).x -= blen*t;
		    A(i,q).x = A(i,q).y = 0.0;
		  }
		else if (i==q)
		  {
		    A(i,p).x = A(i,p).y = 0.0;
		    A(i,q).x += blen*t;
		  }
		else
		  {
		    zp = A(i,p);
		    zq = A(i,q);
/* 		    A(i,p) -= stau*zp + betastar*zq; */
/* 		    A(i,q) += beta*zp - stau*zq; */
		    A(i,p).x -= stau*zp.x + betastar.x*zq.x - betastar.y*zq.y;
		    A(i,p).y -= stau*zp.y + betastar.x*zq.y + betastar.y*zq.x;
		    A(i,q).x += beta.x*zp.x - beta.y*zp.y - stau*zq.x;
		    A(i,q).y += beta.x*zp.y + beta.y*zp.x - stau*zq.y;
		    A(p,i).x = A(i,p).x; A(p,i).y = -A(i,p).y;
		    A(q,i).x = A(i,q).x; A(q,i).y = -A(i,q).y;
		  }
	      }

	    /* Now we must update the eigenvector matrix with this
	     * rotation:  evecs <- evecs*P_pq.
	     * Update p'th and q'th column of evecs */
	    for (i=0; i < n; i++)
	      {
		zp = evecs(i,p);
		zq = evecs(i,q);
/* 		evecs(i,p) = alpha*zp - betastar*zq; */
/* 		evecs(i,q) = beta*zp  + alpha*zq; */
		evecs(i,p).x = alpha*zp.x - betastar.x*zq.x + betastar.y*zq.y;
		evecs(i,p).y = alpha*zp.y - betastar.x*zq.y - betastar.y*zq.x;
		evecs(i,q).x = beta.x*zp.x - beta.y*zp.y + alpha*zq.x;
		evecs(i,q).y = beta.x*zp.y + beta.y*zp.x + alpha*zq.y;
	      }
	  } /* (p,q) rotation loop */
    } /* end of sweep loop */

  if (sweep == MAXSWEEP)
    {
      fprintf(stderr,"\nWarning:  Jacobi() needs more than %d sweeps\n\n",
	      MAXSWEEP);
      fflush(stderr);
    }

  for (i=0; i < n; i++)
    evals[i] = REAL(A(i,i));

  /* sort eigs and evecs by ascending eigenvalue */
  int *list = (int *)mymalloc(sizeof(int)*n,"list","Jacobi()");
  for (i=0; i < n; i++)
    list[i] = i;
  for (i=1; i < n; i++)
    {
      temp = evals[i];
      j=i-1;
      while (j>=0 && evals[j]>temp)
	{
	  evals[j+1] = evals[j];
	  list[j+1] = list[j];
	  j--;
	}
      evals[j+1] = temp;
      list[j+1] = i;
    }
  A = evecs;
  for (i=0; i < n; i++)
    for (j=0; j < n; j++)
      evecs(i,j) = A(i,list[j]);
  free(list);

}

#else // SCALAR_IS_COMPLEX
#error Jacobi routine can not currently handle real scalars...
#endif

#endif // DFT_USE_JACOBI