/*
    DFT++ is a density functional package developed by the research group
    of Professor Tomas Arias

    Copyright 1996-2003 Sohrab Ismail-Beigi

    This file is part of DFT++.

    DFT++ is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DFT++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DFT++; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Please see the file CREDITS for a list of authors.

    For academic users, we request that publications using results obtained with
    this software reference

    "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
    and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).

    and, if using the wavelet basis, further reference

    "Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
    T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).

    and 

    "Robust ab initio calculation of condensed matter: transparent convergence through
    semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
    Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).

    For your convenience, preprints of the above articles may be obtained from
    http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/

/*
 *     Sohrab Ismail-Beigi           Mar. 30, 1997
 *
 * Calculates the gradient of the energy versus the electronic variables
 * Y (and also all the various routines needed to do this).
 *
 */

/* $Id: calcelecgrad.cpp,v 1.15.2.25 2003/05/29 18:54:14 ivan Exp $ */

#include "header.h"
#include "parallel.h"

/*
 * Calculates the local part of the self-consistent potential.
 */
void calc_Vscloc(Elecvars &evars, Elecinfo &einfo, Symmetries &symm)
{
#ifdef DFT_PROFILING
  timerOn(25);   // Turn on calc_Vscloc timer
#endif // DFT_PROFILING
  int dir,dag; 

  RealSpaceScalarFieldColumn &Vscloc = evars.Vscloc;
  CoeffSpaceScalarFieldColumn &Vlocpot = evars.Vlocpot;
  CoeffSpaceScalarFieldColumn &d = evars.d;
  RealSpaceScalarFieldColumn &n = evars.n;

  switch (einfo.exc_opt) {
  case DFT_EXCORR_GGA_PBE:
    {
        RealSpaceScalarFieldColumn rtemp(n);
        CoeffSpaceScalarFieldColumn ctemp(d);

	// TAA: Test of gradient "D" functions (remove at cleanup)
//  	{
//  	  Basis *basis = n.basis;
//  	  Lattice *lattice = basis->basis_spec->lattice;

//  	  int Nx,Ny,Nz,Nx2,Ny2,Nz2,NyNz,NxNyNz;
//  	  Nx = basis->basis_spec->Nx; Nx2 = Nx/2;
//  	  Ny = basis->basis_spec->Ny; Ny2 = Ny/2;
//  	  Nz = basis->basis_spec->Nz; Nz2 = Nz/2;
//  	  NyNz = Ny*Nz;
//  	  NxNyNz=basis->basis_spec->NxNyNz;
	  
//  	  RealSpaceScalarFieldColumn Dx(n);
	  
//  	  for (int i=-Nx2; i < Nx2; i++)
//  	    for (int j=-Ny2; j < Ny2; j++)
//  	      for (int k=-Nz2; k < Nz2; k++)
//  		{
//  		  int index = 0;
//  		  if (k < 0) index += k+Nz;        else index += k;
//  		  if (j < 0) index += Nz*(j+Ny);   else index += Nz*j;
//  		  if (i < 0) index += NyNz*(i+Nx); else index += NyNz*i;
//  		  n.data.d[index] = cos(11*2*M_PI*j/((double) Nx))*lattice->R.m[0][0]/(2*M_PI);
//  		}
//  	  int dir,dag;
//  	  apply_D(dir=1,dag=0,n,Dx);
//  	  double tmp=0.;
//  	  FILE *fid=fopen("out","w");
//  	  for (int i=0; i<NxNyNz; i++)
//  	    {
//  	      tmp+=Dx.data.d[i].x*Dx.data.d[i].x+Dx.data.d[i].y*Dx.data.d[i].y;
//  	      fprintf(fid,"%20.10lf %20.10lf %20.10lf\n",n.data.d[i].x,Dx.data.d[i].x,Dx.data.d[i].y);
//  	    }
//  	  fclose(fid);
//  	  exit(1);
//  	}
	// End Test gradient function

        //Vscloc = Jdag(Vlocpot+O(d+J(exc_LDA(n,Dx))));
	RealSpaceScalarFieldColumn Dx(n),Dy(n),Dz(n);
  	apply_D(dir=0,dag=0,n,Dx);
  	apply_D(dir=1,dag=0,n,Dy);
  	apply_D(dir=2,dag=0,n,Dz);

        exc_GGA(n,Dx,Dy,Dz,rtemp);
        apply_J(rtemp,ctemp); /* TAA: Add in Hartree pot... */ ctemp+=d;
        apply_O(ctemp,ctemp); /* TAA: Add in loc pot... */ ctemp+=Vlocpot;
        apply_Jdag(ctemp, Vscloc);

        //Vscloc += pointwise_mult(Dnexc_GGA(n,Dx),Jdag(O(J(n))))
	//    + sum_i Ddag_i( pointwise_mult(DD_iexc_GGA(n,Dx),Jdag(O(J(n)))) )
        apply_J(n,ctemp); apply_O(ctemp,ctemp); 
        apply_Jdag(ctemp,rtemp);

        Vscloc+=pointwise_mult(Dnexc_GGA(n,Dx,Dy,Dz),rtemp);
	Vscloc+=Ddag(dir=0,pointwise_mult(DDexc_GGA(dir=0,n,Dx,Dy,Dz),rtemp));
	Vscloc+=Ddag(dir=1,pointwise_mult(DDexc_GGA(dir=1,n,Dx,Dy,Dz),rtemp));
	Vscloc+=Ddag(dir=2,pointwise_mult(DDexc_GGA(dir=2,n,Dx,Dy,Dz),rtemp));

        // and here is the place to call the symmetrizer Sdag to the potential
        symmetrize_n_dag(Vscloc, &symm);
        break;
    }
    case DFT_EXCORR_LDA:
      /*
       * Vscloc = Jdag(Vlocps) + Jdag(O(d)) + Jdag(O(J(exc(n)))) + 
       *            pointwise_mult(excprime(n),Jdag(O(J(n))));
       */
    {//ipd: why ptwise_mult- can't we overload *
        RealSpaceScalarFieldColumn rtemp(n);
        CoeffSpaceScalarFieldColumn ctemp(d);
        exc_LDA(n,rtemp);
        apply_J(rtemp,ctemp); ctemp += d;
        apply_O(ctemp,ctemp); ctemp += Vlocpot;	
        apply_Jdag(ctemp, Vscloc);
        //Vscloc = Jdag(Vlocpot+O(d+J(exc_LDA(n)))); 
        apply_J(n,ctemp); apply_O(ctemp,ctemp); 
        apply_Jdag(ctemp,rtemp);
        //Vscloc += pointwise_mult(excprime_LDA(n),Jdag(O(J(n))));
        Vscloc += pointwise_mult(excprime_LDA(n),rtemp);
        // and here is the place to call the symmetrizer Sdag to the potential
        symmetrize_n_dag(Vscloc, &symm);
        break; 
      }
      // Right now, we're only supporting PW LDA xc.  --mhe
      // And Teter's LSD.  --mhe
      
//    case DFT_EXCORR_GGA_PW91:
//      if (einfo.spintype == NOSPIN) {
//        Vscloc = Jdag(Vlocps+O(d+J(exc_LDA(n)+exprime_GC(n))));
//        Vscloc += pointwise_mult(excprime_LDA(n),Jdag(O(J(n))));
//      } else {
//        die("GGA_PW91 not implemented with spin\n");
//      }
//      break;

  case DFT_EXCORR_LSD_TETER:
    {
      RealSpaceScalarFieldColumn &Vscloc_up = evars.Vscloc_up;
      RealSpaceScalarFieldColumn &Vscloc_dn = evars.Vscloc_dn;

      RealSpaceScalarFieldColumn exc(Vscloc);
      RealSpaceScalarFieldColumn excprime_up(Vscloc);
      RealSpaceScalarFieldColumn excprime_dn(Vscloc);

      exc = exc_LSD_TETER(evars.n_up, evars.n_dn);
      excprime_LSD_TETER(evars.n_up, evars.n_dn, excprime_up, excprime_dn);

      Vscloc_up  = Jdag(Vlocpot);
      Vscloc_up += Jdag(O(d));
      Vscloc_up += Jdag(O(J(exc)));
      Vscloc_up += pointwise_mult(excprime_up,Jdag(O(J(n))));
      symmetrize_n_dag(Vscloc_up, &symm);
      
      Vscloc_dn  = Jdag(Vlocpot);
      Vscloc_dn += Jdag(O(d));
      Vscloc_dn += Jdag(O(J(exc)));
      Vscloc_dn += pointwise_mult(excprime_dn,Jdag(O(J(n))));
      symmetrize_n_dag(Vscloc_dn, &symm);
      break;
    }
    default:
      die("That exchange correlation is not supported.");
  }


#ifdef DFT_PROFILING
  timerOff(25);   // Turn off calc_Vscloc timer
#endif // DFT_PROFILING 
}

/*
 * Calls calc_Vscloc() [above] and then
 * calculates and diagonalizes the subspace Hamiltonian.
 */
void
calc_Hsub(Everything &everything)
{
  Elecvars &evars = everything.elecvars;
  Elecinfo &einfo = everything.elecinfo;
  Symmetries &symm = everything.symm;

  int q;
  /* Local workspaces */
  /*
   * to avoid allocate/deallocate many times, create for largest
   * col_length. 
   */
  //ipd: this won't work w/ the current structures
  // update it when we have scratch space manager
/*    int max_col_length = 0; */
/*    for (q=0; q < einfo.nstates; q++) */
/*      if (max_col_length < bs[q].C.col_length) */
/*        max_col_length = bs[q].C.col_length; */


  // if not doing band structure with reading the potential from file
  // we must compute it
  if( (everything.cntrl.band_structure_flag==FALSE)||(einfo.read_vscloc_flag==FALSE) )
    calc_Vscloc(evars,einfo,symm);
  
  
  for (q=0; q < einfo.nstates; q++) {
    BlochState &s = evars.states[q];
  
    ColumnBundle HspC(s.C);
    apply_Hsp(s.C, HspC, *s.Vscloc, everything);
    s.Hsub = (s.C)^HspC;
    s.Hsub.hermetian = 1;
    diagonalize_herm(s.Hsub_eigs,s.Hsub_evecs,s.Hsub,
                     s.Hsub.nr);
  }
}

/*
 * Call calc_Vscloc() [above] and then
 * calculates the energy gradient versus Y and B
 * and calculates and diagonalizes the subspace Hamiltonian.
 */
void
calc_elecgrad_and_Hsub(ColumnBundle **Ygrad, ComplexMatrix **Bgrad,
		       Everything &everything)
{

#ifdef DFT_PROFILING
  timerOn(10); // Turn on calc_elecgrad_... timer
#endif // DFT_PROFILING

  Elecinfo &einfo = everything.elecinfo;
  Elecvars &evars = everything.elecvars;
  Symmetries &symm = everything.symm;
  int q;

  /* Local workspaces */
  /*
   * to avoid allocate/deallocate many times, create for largest
   * col_length. 
   */
/*  int max_col_length = 0;
  for (q=0; q < einfo.nstates; q++)
    if (max_col_length < bs[q].C.col_length)
      max_col_length = bs[q].C.col_length;
*/
//  ColumnBundle tmp(bs[0].C.tot_ncols,max_col_length);
  calc_Vscloc(evars,einfo,symm);
  for (q=0; q < einfo.nstates; q++) {
    BlochState &s = evars.states[q];
    ColumnBundle tmp(s.C);
    /* Apply Hsp, calculate Hsub, and diagonalize Hsub */
    apply_Hsp(s.C,*Ygrad[q],*s.Vscloc, everything);
    s.Hsub = (s.C)^(*Ygrad[q]);
    s.Hsub.hermetian = 1;

    diagonalize_herm(s.Hsub_eigs,s.Hsub_evecs,s.Hsub,s.Hsub.nr);

    /* Calc Y gradient */
/* The code/mess below does: */
/*       HspC = Hsp(C[q],ioninfo) */
/*       Ygrad[q]  = Pbar(C[q],HspC*(F[q]*V[q]*Umhalf[q])); */
/*       Ygrad[q] += O(C[q]*V[q]*Q(Vdag[q]*(Hsub[q]*F[q]-F[q]*Hsub[q])*V[q],W[q],mu[q])); */
/*       Ygrad[q] *= w[q]; */
    if (einfo.subspace_rotation) {
        do_ColumnBundle_matrix_mult(*Ygrad[q],(s.F)*(s.V)*
                                    (s.Umhalf),tmp,0);
        apply_Pbar(s.C,tmp,*Ygrad[q]);
        do_ColumnBundle_matrix_mult(
            s.C,
            (s.V)*Q(herm_adjoint(s.V)*((s.Hsub)*(s.F)-
                                       (s.F)*(s.Hsub))*
                    (s.V),s.W,s.mu),
            tmp,0);
    } else {
      do_ColumnBundle_matrix_mult(*Ygrad[q],(s.F)*(s.Umhalf),tmp,0);
      apply_Pbar(s.C,tmp,*Ygrad[q]);
      do_ColumnBundle_matrix_mult(
        s.C,
        Q((s.Hsub)*(s.F)-(s.F)*(s.Hsub),s.W,
          s.mu),
        tmp,0);
    }
    // O(in,out) should work for this, right?
    //apply_O_inplace(tmp);
    O(tmp,tmp);
    *Ygrad[q] += tmp;
    *Ygrad[q] *= s.w;

    if (einfo.subspace_rotation) {
      /* Calc B gradient and symmetrize it */
      /* Bgrad[q] = R([Hsub[q],F[q]]) */
      *Bgrad[q] = R(((s.Hsub)*(s.F)-(s.F)*(s.Hsub)),s.Z,
                    s.beta);
      *Bgrad[q] += herm_adjoint(*Bgrad[q]);
      *Bgrad[q] *= (scalar)(0.5*(s.w));
    }
  }

#ifdef DFT_PROFILING
  timerOff(10); // Turn off calc_elecgrad_... timer
#endif // DFT_PROFILING
}

/*
 * Does what the above routine does plus preconditioning the gradient.
 * The preconditioning is diagonal inverse kinetic for the plane-wave basis.
 * Namely, the average kinetic energy per band is calculated 
 * (via ener->KE/einfo.nelectrons) and this (times 2.0) is used
 * as the roll-over value of the kinetic energy for the preconditioning.
 */
void
calc_elecgrad_pgrad_and_Hsub(ColumnBundle **Ygrad, ColumnBundle **pYgrad,
			     ComplexMatrix **Bgrad, ComplexMatrix **pBgrad,
			     Everything &everything)
{
#ifdef DFT_PROFILING
  timerOn(10); // Turn on calc_elecgrad_... timer
#endif // DFT_PROFILING

  Elecinfo &einfo = everything.elecinfo;
  Elecvars &evars = everything.elecvars;
  Symmetries &symm = everything.symm;

  int q;

  /* Local workspaces */
  /*
   * to avoid allocate/deallocate many times, create for largest
   * col_length. 
   */
  // can't do this anymore: problem? --csg
  // 
  //  int max_col_length = 0;
  //for (q=0; q < einfo.nstates; q++)
  //  if (max_col_length < s.C.col_length)
  //    max_col_length = s.C.col_length;
  //
  //  ColumnBundle tmp(bs[0].C.tot_ncols,max_col_length);

  /* Roll-over value for kinetic preconditioning:  2.0*average kinetic
   * energy per band. */
  // This is now computed inside K.
  //real KErollover;
  //KErollover = 2.0*ener.KE/einfo.nelectrons;

  calc_Vscloc(evars,einfo,symm);
  for (q=0; q < einfo.nstates; q++) {
    BlochState &s = evars.states[q];
    ColumnBundle tmp(s.C);

    /*
     * In this wonderful mess below, we calculate the subspace Hamiltonian,
     * the gradient, and the preconditioned gradient!  The formulae:
     *  C[q] = Y[q]*Umhalf[q]*Vdag[q]
     *  Hsub[q] = C[q]*Hsp(C[q])
     *  Ygrad[q] = w[q]*(g1 + g2)
     *  g1 = Pbar(Hsp(C[q]*F[q]*V[q]*Umhalf[q]))
     *     = Pbar(Hsp(Y[q]*Umhalf[q]*Vdag[q]*F[q]*V[q]*Umhalf[q]))
     *  g2 = O(C[q]*V[q]*Q(Vdag[q]*[Hsub[q],F[q]]*V[q]))
     *  pYgrad[q] = w[q]*(Pbar(precond(Pbar(Hsp(Y[q])))) + g2)
     */

    /* First we calculate Hsp(Y[q]), Hsp(Y[q])*Umhalf[q]*Vdag[q]=Hsp(C[q]), and Hsub[q] */

    // here pYgrad is the OUTPUT
    apply_Hsp(s.Y,*pYgrad[q],*s.Vscloc, everything);

    if (einfo.subspace_rotation)
      do_ColumnBundle_matrix_mult(*pYgrad[q],
                                  (s.Umhalf)*herm_adjoint(s.V), tmp,0);
    else
      do_ColumnBundle_matrix_mult(*pYgrad[q],(s.Umhalf),tmp,0);
      

    s.Hsub = (s.C)^tmp;

    /* Digonalize Hsub[q] */
    s.Hsub.hermetian = 1;
    diagonalize_herm(s.Hsub_eigs,s.Hsub_evecs,s.Hsub,
                     s.Hsub.nr);
    
    /* Now calculate g1 and the first term of pYgrad[q] */
    apply_Pbar(s.C,*pYgrad[q],tmp);
    if (einfo.subspace_rotation)
      do_ColumnBundle_matrix_mult(
        tmp,
        (s.Umhalf)*herm_adjoint(s.V)*(s.F)*(s.V)
        *(s.Umhalf),
        *Ygrad[q],0);
    else
      do_ColumnBundle_matrix_mult(tmp,(s.Umhalf)*(s.F)*
                                  (s.Umhalf),*Ygrad[q],0);
    //precond_inv_kinetic(tmp,KErollover);
    // ipd 09/14/02: Uncommenting the next 2 lines to the original state
    // looks like matt forgot about them :(
    K(tmp,everything);
    apply_Pbar(s.C,tmp,*pYgrad[q]);
    // Ygrad contains Pbar*Hsp*Y*Umhalf*F*Umhalf
    // tmp contains Pbar*Hsp*Y

    // Precondition only the first term in the gradient.
    // ipd 09/14/02: Commenting out the next 2 lines to the original state
    //*pYgrad[q] = *Ygrad[q];
    //K(*pYgrad[q],everything);

    /* Add in g2 = O*C*Q() term to Ygrad[q] and pYgrad[q] */
    if (einfo.subspace_rotation)
      do_ColumnBundle_matrix_mult(
        s.C,
        (s.V)*Q(herm_adjoint(s.V)*
                ((s.Hsub)*(s.F)-(s.F)*(s.Hsub))*
                (s.V),s.W,s.mu),
        tmp,0);
    else
      do_ColumnBundle_matrix_mult(s.C,
                                  Q((s.Hsub)*(s.F)-(s.F)*
                                    (s.Hsub),s.W,s.mu),
                                  tmp,0);
    //apply_O_inplace(tmp);
    O(tmp,tmp);
    *Ygrad[q] += tmp;
    // Use if we're only preconditioning the first term.
    *pYgrad[q] += tmp;
    // Precondition here.
    //*pYgrad[q] = *Ygrad[q]; 
    //K(*pYgrad[q],everything);

    /* scale by weights */
    *Ygrad[q] *= s.w;
    *pYgrad[q] *= s.w;
    
    /* Calc B gradient and symmetrize it */
    /* Bgrad[q] = R([Hsub[q],F[q]]) */
    if (einfo.subspace_rotation) {
      *Bgrad[q] = R(((s.Hsub)*(s.F)-(s.F)*(s.Hsub)),s.Z,
                    s.beta);
      *Bgrad[q] += herm_adjoint(*Bgrad[q]);
      *Bgrad[q] *= (0.5 * (s.w));
      *pBgrad[q] = ((scalar) einfo.pcond_for_Bgrad) * (*Bgrad[q]);
    }
  }
  
#ifdef DFT_PROFILING
  timerOff(10); // Turn off calc_elecgrad_... timer
#endif // DFT_PROFILING
  
}

/*
 * Calculates the energy gradient versus Y of the "energy" (objective)
 * function used in band structure calculations:
 *
 *         E_band = sum_q tr( C[q]^H*C[q] )
 */
void
calc_elecgrad_band_struct(ColumnBundle **Ygrad,
                          Everything &everything)
{
  
#ifdef DFT_PROFILING
  timerOn(10); // Turn on calc_elecgrad_... timer
#endif // DFT_PROFILING
  
  Elecinfo &einfo = everything.elecinfo;
  Elecvars &evars = everything.elecvars;
  
  int q;
  /* Local copies to make the formulae easier to read */
  //column_bundle *C = evars.C;
  //matrix *Umhalf = evars.Umhalf;
  
  /*
   * Local workspace:
   * to avoid allocate/deallocate many times, create for largest col_length. 
   */
  //int max_col_length = 0;
  //for (q=0; q < einfo.nstates; q++)
  //  if (max_col_length < s.C.col_length)
  //    max_col_length = s.C.col_length;
  //ColumnBundle tmp(bs[0].C.tot_ncols,max_col_length);
  
  for (q=0; q < einfo.nstates; q++){
    BlochState &s = evars.states[q];
  
    // manually adjust ColumnBundle length;
    //tmp.col_length = s.C.col_length;
    //copy_innards_ColumnBundle(&(s.C),&tmp);
    ColumnBundle tmp(s.C);

    /*
     * Calc Y gradient, with formula give by:
     *   Ygrad[q] = Pbar(H(C[q]))*U[q]^{-1/2}
     */
    apply_Hsp(s.C,*Ygrad[q],*s.Vscloc, everything);
    do_ColumnBundle_matrix_mult(*Ygrad[q],s.Umhalf,tmp,0);
    apply_Pbar(s.C,tmp,*Ygrad[q]);
    
  }
#ifdef DFT_PROFILING
  timerOff(10); // Turn off calc_elecgrad_... timer
#endif // DFT_PROFILING
}

/*
 * This routine calculates the gradient and preconditioned gradient
 * for band structure minimizations.  Preconditioning is done just
 * like above, except that since we don't have the kinetic energy
 * of the electrons explicitely, we *GUESS*:  the average kinetic
 * energy per electron for choosing the preconditioner roll-over point
 * is chosen to be 1 Hartree for now.
 */
void
calc_elecgrad_pgrad_band_struct(ColumnBundle **Ygrad,
                                ColumnBundle **pYgrad,
                                Everything &everything)
{
#ifdef DFT_PROFILING
  timerOn(10); // Turn on calc_elecgrad_... timer
#endif // DFT_PROFILING
  
  Elecinfo &einfo = everything.elecinfo;
  Elecvars &evars = everything.elecvars;

  int q;
  /* Local copies to make the formulae easier to read */
//    column_bundle *C = evars.C;
//    column_bundle *Y = evars.Y;
//    matrix *Umhalf = evars.Umhalf;
  
  /* Local workspaces:
   * to avoid allocate/deallocate many times create for largest col_length. */
  //int max_col_length = 0;
  //for (q=0; q < einfo.nstates; q++)
  //  if (max_col_length < bs[q].C.col_length)
  //   max_col_length = bs[q].C.col_length;
  //ColumnBundle tmp(bs[0].C.tot_ncols,max_col_length);

  /* Roll-over value for kinetic preconditioning:  2.0*average kinetic
   * energy per band. */
  // This is now done in K.
  //real kinetic_energy_per_electron = 1.0; // ener.KE/einfo.nelectrons
  //real KErollover;
  //KErollover = 2.0 * kinetic_energy_per_electron;
  for (q=0; q < einfo.nstates; q++) {
    BlochState &s = evars.states[q];
    // manually adjust ColumnBundle length;
    //tmp.col_length = bs[q].C.col_length;
    //copy_innards_ColumnBundle(&(bs[q].C),&tmp);
    ColumnBundle tmp(s.C);

    /* 
     * Compute gradient and preconditioned gradient.  Formulae:
     * Ygrad[q]  = Pbar(H(Y[q]))*U^-1
     * pYgrad[q] = Pbar(precondition(Pbar(H(Y[q]))))
     */
    apply_Hsp(s.Y,*pYgrad[q],*s.Vscloc, everything);
    apply_Pbar(s.C,*pYgrad[q],tmp);
    do_ColumnBundle_matrix_mult(tmp,(s.Umhalf)*(s.Umhalf),
                                *Ygrad[q],0);
    //precond_inv_kinetic(tmp,KErollover);
    K(tmp,everything);
    apply_Pbar(s.C,tmp,*pYgrad[q]);
  }
  
#ifdef DFT_PROFILING
  timerOff(10); // Turn off calc_elecgrad_... timer
#endif // DFT_PROFILING
  
}

/*
 * Prints the eigenenergies of the subspace Hamiltonian for each
 * state.
 */
void print_Hsub_eigs(Everything &everything,Output *out)
{
  Elecinfo &einfo = everything.elecinfo;
  Elecvars &evars = everything.elecvars;
  
  int q,i;
  
  out->printf("Band energies:\n");
  for (q=0; q < einfo.nstates; q++){
    out->printf("\nstate = %d   q_k = [ %lg %lg %lg ]   w = %lg",
                q,evars.states[q].qnum.kvec.v[0],
                evars.states[q].qnum.kvec.v[1],
                evars.states[q].qnum.kvec.v[2],
                evars.states[q].w);
    if (einfo.spintype == FREESPIN)
      out->printf("   spin = free\n");
    else
      out->printf("   spin = %d\n",evars.states[q].qnum.spin);
    
    out->printf("%4s  %13s  %13s  %13s\n",
		"band","filling   ","diag(Hsub) ","epsilon   ");
    out->printf("-------------------------------------------------\n");
    for (i=0; i < evars.states[q].Hsub.nr; i++)
      out->printf("%4d  %13.6le  %13.6le  %13.6le\n",
                  i,REAL(evars.states[q].F.c[i]),
                  REAL(evars.states[q].Hsub(i,i)),
                  evars.states[q].Hsub_eigs[i]);
  }
  out->flush();
}


syntax highlighted by Code2HTML, v. 0.9.1