/*
    DFT++ is a density functional package developed by the research group
    of Professor Tomas Arias

    Copyright 1996-2003 Sohrab Ismail-Beigi

    This file is part of DFT++.

    DFT++ is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DFT++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DFT++; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Please see the file CREDITS for a list of authors.

    For academic users, we request that publications using results obtained with
    this software reference

    "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
    and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).

    and, if using the wavelet basis, further reference

    "Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
    T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).

    and 

    "Robust ab initio calculation of condensed matter: transparent convergence through
    semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
    Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).

    For your convenience, preprints of the above articles may be obtained from
    http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/

#include "header.h"


/* Returns (I-P)Y...the complement projector of P=O*C*Cdag where
 * Cdag is the Hermetian adjoint of C and C = Y*U(-1/2).
 * Thus the routine returns Pbar*Yin = (I-O*C*Cdag)*Yin. */
ColumnBundle
Pbar(const ColumnBundle &C,const ColumnBundle &Y)
{
  ColumnBundle result(Y);

  apply_Pbar(C, Y, result);

  return result;
}

/* Less memory intensive version of Pbar() above */
void
apply_Pbar(const ColumnBundle &C,
	   const ColumnBundle &Y,
	   ColumnBundle &PbarY)
{
#ifdef DFT_PROFILING

  timerOn(30); // turn on Pbar timer

#endif // DFT_PROFILING


  do_ColumnBundle_matrix_mult(C,-1.*(C^Y),PbarY,0);
  O(PbarY,PbarY);
  //PbarY.negate();

  PbarY += Y;

#ifdef DFT_PROFILING

  timerOff(30); // turn off Pbar timer

#endif // DFT_PROFILING

}


//

// Applies the local potential Vscloc to Y and accumulates

// the result into HspY.  It does this on the columns start_col

// to start_col+n_cols_todo-1 of Y.

//

// the last two arguments are necessary for reuse in case of threads

//

void
apply_Vsc(const ColumnBundle &Y, ColumnBundle &HspY,
          const RealSpaceScalarFieldColumn &V, 
          int start_col,int n_cols_todo)
{
  // Local part of self-consistent potential: Idag*Diag(Vsc)*I*Y

  //

  // HspY(:,i) += Idag*Diag(Vscloc)*I*Y(:,i) (i'th column of both sides)

  //

  int i;
  // Holds I of Ycol

  RealSpaceScalarFieldColumn IYcol(V);
  //ipd: since the columns of Y are slim you can't use the copy constructor

  //     in its current shape

  //CoeffSpaceWavefunctionColumn IdagVIYcol(*Y.col[start_col]);

  CoeffSpaceWavefunctionColumn IdagVIYcol(*Y.col[start_col],"fat");

  
  for (i = start_col; i < start_col+n_cols_todo; i++) {
    // Apply I to Ycol

    apply_I(*Y.col[i],IYcol);
    
    IYcol *= V;
    
    //ipd: this will generate 2 extra copies of a column

    apply_Idag(IYcol,IdagVIYcol);
    //ipd: avoid one of them by

    // apply_Idag(IYcol, IdagVIYcol);

    //ipd: save a column copy inside Idag by upgrading Idag and adding

    // Idag(IYcol, IYcol);

    // IYcol.map(IdagVIYcol);

      
    // Accumulate into the i'th column of HspY

    *(HspY.col[i]) += IdagVIYcol;
  }

}

#ifdef DFT_THREAD

//

// Threads run this routine.  It just applies Vsc to the columns

// this thread is working on by calling the above routine.

//

void *
apply_Vsc_thread(void *arg)
{
  // Decode the data passed to the thread

  dft_thread_data *data = (dft_thread_data *)arg;

  ColumnBundle *Y = (ColumnBundle *)data->p1;
  ColumnBundle *HspY = (ColumnBundle *)data->p2;
  RealSpaceScalarFieldColumn *V = (RealSpaceScalarFieldColumn *)data->p3;
  int start_col = data->start;
  int n_cols_todo = data->n;

  // Do the work

  apply_Vsc(*Y,*HspY,*V,start_col,n_cols_todo);

  // Free data-passing object and exit

  myfree(arg);
  return NULL;
}
#endif // DFT_THREAD



/*
 * Multiply ColumnBundle by single-particle Hamiltonian:
 *
 * Hsp = -0.5*L + Idag*Diag(Vscloc)*I
 *              + sum(ions,l,m,...) { Vnl*M*Vnl^ }
 *
 * i.e. Kinetic operator, local self-consistent potential, and non-local
 * potential respectively.
 *
 * Takes Vscloc and Everything is a hack for Vnl
 * It actaully calls the routine below it...
 */
ColumnBundle
Hsp(const ColumnBundle &Y, const RealSpaceScalarFieldColumn &Vscloc,
    Everything &e)
{
  /* Holds final result */
  ColumnBundle HspY(Y);

  /* Do the work!! */
  apply_Hsp(Y,HspY,Vscloc,e);

  return HspY;
}

//

// Actually does the work of applying Hsp onto Y.

//

void
apply_Hsp(const ColumnBundle &Y, ColumnBundle &HspY,
          const  RealSpaceScalarFieldColumn &Vscloc, Everything &e)
{
#ifdef DFT_PROFILING

  timerOn(26);   // Turn on apply_Hsp timer

#endif // DFT_PROFILING

  // Kinetic part: HspY = -0.5*L(Y);

  L(Y,HspY);

  HspY *= -0.5;
  
#ifdef DFT_PROFILING

  timerOn(42);  // Local Hsp application timer

#endif

  
  // Local part of self-consistent potential: Idag*Diag(Vsc)*I*Y 



#ifdef DFT_THREAD

  // Threads!  Distribute the work

  dft_call_threads(Y.my_ncols,
                   (void *)&Y,(void *)&HspY,(void *)&Vscloc,
                   NULL,NULL,
                   0,0,0,0,0,0,
                   apply_Vsc_thread);
#else

  // Serial mode:  do the work in one big call

  apply_Vsc(Y,HspY,Vscloc,0,Y.my_ncols);

//  (Y^Y).print(); exit(1);

#endif


#ifdef DFT_PROFILING

  timerOff(42);  // Local Hsp application timer

  timerOn(43); // Nonlocal timer

#endif


  // This is a slightly generalized version of a non-local pseudopotential

  // operator.  It applies Vnl to Y, and adds the result to HspY.

  //ipd: we have to decide about this guy - what do we need to pass

  // but whatever it is the bloch state could know about it

  accumulate_Vnl(Y,HspY,e);
  //(HspY^HspY).print(); exit(1);


#ifdef DFT_PROFILING

  timerOff(43);  // nonlocal timer

#endif


#ifdef DFT_PROFILING

  timerOff(26);   // Turn off apply_Hsp timer

#endif // DFT_PROFILING

}


syntax highlighted by Code2HTML, v. 0.9.1