/*
    DFT++ is a density functional package developed by the research group
    of Professor Tomas Arias

    Copyright 1996-2003 Sohrab Ismail-Beigi

    This file is part of DFT++.

    DFT++ is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DFT++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DFT++; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Please see the file CREDITS for a list of authors.

    For academic users, we request that publications using results obtained with
    this software reference

    "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
    and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).

    and, if using the wavelet basis, further reference

    "Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
    T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).

    and 

    "Robust ab initio calculation of condensed matter: transparent convergence through
    semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
    Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).

    For your convenience, preprints of the above articles may be obtained from
    http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/time.h>
#include "WL_header.h"

#define HFL 3
#define TIMELOOP 50

#define PX 3
#define PY 3
#define PZ 3

int Px,Py,Pz;
int DIR;

/* Computes a multinomial function at a point in space (OUTPUT)
   USES multinomial exponents from GLOBAL variables Px, Py, Pz!!!
   r: real-space point
   return value: value of function
*/
double multi(struct Dvec r) {
  return( pow(r.x,Px)*pow(r.y,Py)*pow(r.z,Pz) );
}

double multiD(struct Dvec r) {
  switch (DIR) {
  case 0:
    if (Px==0) 
      return 0.;
    else
      return( Px*pow(r.x,Px-1)*pow(r.y,Py)*pow(r.z,Pz) );
  case 1:
    if (Py==0)
      return 0.;
    else
      return( Py*pow(r.x,Px)*pow(r.y,Py-1)*pow(r.z,Pz) );
  case 2:
    if (Pz==0)
      return 0.;
    else
      return( Pz*pow(r.x,Px)*pow(r.y,Py)*pow(r.z,Pz-1) );
  default:
    printf("DIR=%d in multiD()!?!\n");
    exit(1);
  }
}

main()
{
  /* Space for grid specification structure */
  struct Gridspec spec[NgridMx];

  /* Pointer to special grids (will be allocated below) */
  struct Grid *ones,*onesc,*nononesc,*zeros,*ghosts,*nonghosts;

  /* Size needed for workspace */
  struct Ivec szmx={0,0,0};

  /* Pointer to "grid"s */
  struct Grid *grid,*grid1,*grid2,*grid3,*workgrid;

  /* Scale and origin structure for top level */
  struct Dvec org={-4., -4., -4.};

  /* Work space variables */
  double *work,*work_extra;
  int lwork;

  /* Timer variables */
  struct timeval time1,time2;
  double rtime,mflop;

  /* Misc variables */
  FILE *fp;
  double tmp;
  int i,j,k;

  printf("\n-----\n");
  /* Read grid structure from file */
  if ( (fp=fopen("gridspec","r")) == NULL )
    {
      fprintf(stderr,"Can't open file \"gridspec\" for input.\n");
      fprintf(stderr,"Exiting.\n");
      exit(1);
    }
  readgridspec(fp,spec);
  close(fp);
  /* Set top level grid spacings to something non-trivial */
  spec[0].scale.x=2.; spec[0].scale.y=3.; spec[0].scale.z=4.;
  printf("Seting (dx,dy,dz)=(%lf,%lf,%lf)...\n\n",
	 spec[0].scale.x,spec[0].scale.y,spec[0].scale.z);

  /* Make and allocate special grids */
  zeros=mkgridnew(spec,NULL);
  ones=mkgridnew(spec,NULL);
  onesc=mkgridnew(spec,NULL);
  nononesc=mkgridnew(spec,NULL);
  ghosts=mkgridnew(spec,NULL);
  nonghosts=mkgridnew(spec,NULL);

  /* Get needed work array size and memory */
  getgridsizemx(ones,&szmx);
  lwork=(szmx.x+3*HFL)*(szmx.y+3*HFL)*(szmx.z+3*HFL);
  if ( (work=(double *) malloc(lwork*sizeof(double))) == NULL )
    {
      fprintf(stderr,"malloc failed for %d doubles in main (1)\n",lwork);
      fprintf(stderr,"Exiting.\n");
      exit(1);
    }
  if ( (work_extra=(double *) malloc(lwork*sizeof(double))) == NULL )
    {
      fprintf(stderr,"malloc failed for %d doubles in main (1)\n",lwork);
      fprintf(stderr,"Exiting.\n");
      exit(1);
    }
  
  for(i=0;i<szmx.x;i++)
    for(j=0;j<szmx.y;j++)
      for(k=0;k<szmx.z;k++)
    {
      work[i*szmx.y*szmx.z+j*szmx.z+k]=rand(); 
      work_extra[i*szmx.y*szmx.z+j*szmx.z+k]=rand(); 
    }

  /* Fill in special grids */
  evalgrid(zeros,org,zero);
  evalgrid(ones,org,one);
  evalgrid(onesc,org,one);
  J(onesc,work,work_extra,lwork);
  gridsub(nononesc,ones,onesc);
  gridghost(ghosts);
  gridsub(nonghosts,ones,ghosts);

  /* Make and allocate "grid"s */
  grid=mkgridnew(spec,NULL);
  grid1=mkgridnew(spec,NULL);
  grid2=mkgridnew(spec,NULL);
  grid3=mkgridnew(spec,NULL);

  workgrid=mkgridnew(spec,NULL);

  printf("Timing %d iterations... \n",TIMELOOP);
  /* Time D */
  evalgrid(grid1,org,rnd); J(grid1,work,work_extra,lwork); /* Coeff space */
  for (i=0; i<3; i++) {
    gettimeofday(&time1,NULL);
    for (j=0; j<TIMELOOP; j++)
      D(i,grid3,grid1,workgrid,work,work_extra,lwork);
    gettimeofday(&time2,NULL);
    rtime=1.*(time2.tv_sec-time1.tv_sec)+(time2.tv_usec-time1.tv_usec)/1.e6;

    /* APPROXIMATE flop count */
    /* Flops for I & Dabove */
    mflop=0.;
    for (j=0; spec[j].level!=-1; j++) {
      if (spec[j].level!=0) {
	mflop+= /* I */
	  (spec[j].dim.x+2*HFL)*(spec[j].dim.y+2*HFL)*(spec[j].dim.z+2*HFL)*
	  (1.+0.5+0.25)*(3+7)*(TIMELOOP)/1e6;
	mflop+=1e-6*TIMELOOP* /* Dabove */
	  (
	   (spec[j].dim.x/2+5)*(spec[j].dim.y/2+5)*spec[j].dim.z+
	   (spec[j].dim.x/2+5)*spec[j].dim.y*spec[j].dim.z+
	   spec[j].dim.x*spec[j].dim.y*spec[j].dim.z
	   )*((2/3.)*7.5+(1./3.)*6.5);
      }
    }
  /* Flops for Dsame */
  for (j=0; spec[j].level!=-1; j++) {
    mflop+=1e-6*TIMELOOP*
      spec[j].dim.x*spec[j].dim.y*spec[j].dim.z*5;
  }
  printf("D%d:  %.0lf MFLOP in %.3f sec ==> %.0lf MFLOPS.\n",i,
	 mflop,rtime,mflop/rtime);
  }


  /* Time Ddag */
  evalgrid(grid1,org,rnd); /* Real space */
  for (i=0; i<3; i++) {
    gettimeofday(&time1,NULL);
    for (j=0; j<TIMELOOP; j++)
      Ddag(i,grid3,grid1,workgrid,work,work_extra,lwork);
    gettimeofday(&time2,NULL);
    rtime=1.*(time2.tv_sec-time1.tv_sec)+(time2.tv_usec-time1.tv_usec)/1.e6;

    /* APPROXIMATE flop count */
    /* Flops for I & Dbelow */
    mflop=0.;
    for (j=0; spec[j].level!=-1; j++) {
      if (spec[j].level!=0) {
	mflop+= /* Idag */
	  (spec[j].dim.x+2*HFL)*(spec[j].dim.y+2*HFL)*(spec[j].dim.z+2*HFL)*
	  (1.+0.5+0.25)*(3.5+7)*(TIMELOOP)/1e6;
	mflop+=1e-6*TIMELOOP* /* Dbelow */
	  (
	   (spec[j].dim.x/2+5)*(spec[j].dim.y/2+5)*spec[j].dim.z+
	   (spec[j].dim.x/2+5)*spec[j].dim.y*spec[j].dim.z+
	   spec[j].dim.x*spec[j].dim.y*spec[j].dim.z
	   )*((2/3.)*7.5+(1./3.)*6.5);
      }
    }
    /* Flops for Dsame */
    for (j=0; spec[j].level!=-1; j++) {
      mflop+=1e-6*TIMELOOP*
	spec[j].dim.x*spec[j].dim.y*spec[j].dim.z*5;
    }
  printf("Ddag%d:  %.0lf MFLOP in %.3f sec ==> %.0lf MFLOPS.\n",i,
	 mflop,rtime,mflop/rtime);
  }


  /* Test linearity of Ddag */
  for (i=0; i<3; i++) {
    evalgrid(grid1,org,rnd);
    evalgrid(grid2,org,rnd);
    gridadd(grid3,grid1,grid2);

    Ddag(i,grid,grid3,workgrid,work,work_extra,lwork);

    Ddag(i,grid3,grid1,workgrid,work,work_extra,lwork);
    gridsub(grid,grid,grid3);

    Ddag(i,grid3,grid2,workgrid,work,work_extra,lwork);
    gridsub(grid,grid,grid3);

    printf("Ddag%d is linear: %le\n",i,
	   sqrt(griddot(grid,grid))/sqrt(griddot(grid3,grid3)) );
  }


  /* Test Ddag is dagger of D */

  for (i=0; i<3; i++) {
    evalgrid(grid1,org,rnd); /* real space */
    evalgrid(grid2,org,rnd); J(grid2,work,work_extra,lwork); /* coeff space */

    Ddag(i,grid,grid1,workgrid,work,work_extra,lwork);
    D(i,grid3,grid2,workgrid,work,work_extra,lwork);
  
    printf("Ddag%d is dagger of D%d: %le\n",i,i,
	   (griddot(grid,grid2)-griddot(grid1,grid3))/griddot(grid,grid2));
  }

  /* Test linearity of D */
  for (i=0; i<3; i++) {
    evalgrid(grid1,org,rnd); J(grid1,work,work_extra,lwork);
    evalgrid(grid2,org,rnd); J(grid2,work,work_extra,lwork);
    gridadd(grid3,grid1,grid2);

    D(i,grid,grid3,workgrid,work,work_extra,lwork);

    D(i,grid3,grid1,workgrid,work,work_extra,lwork);
    gridsub(grid,grid,grid3);

    D(i,grid3,grid2,workgrid,work,work_extra,lwork);
    gridsub(grid,grid,grid3);

    printf("D%d is linear: %le\n",i,
	   sqrt(griddot(grid,grid))/sqrt(griddot(grid3,grid3)) );
  }

  /* Test proper above/below accounting */
  for (i=0; i<3; i++) {
    /* grid2 = random coeff space */
    evalgrid(grid2,org,rnd); J(grid2,work,work_extra,lwork);
    /* Do grid=Di(grid2) (derivative in i-th direction */
    D(i,grid,grid2,workgrid,work,work_extra,lwork);
    printf("Above/below accounting in D%d: %le\n",
	   i,sqrt(gridckghosts(grid,1)));
  }

  /* Multinomial tests */
  for (DIR=0; DIR<3; DIR++) {

    tmp=0;
    for (Px=0; Px<=PX; Px++)
      for (Py=0; Py<=PY; Py++)
	for (Pz=0; Pz<=PZ; Pz++) {
	  /* Evaluate multinomial in coeff space, then apply D */
	  evalgrid(grid1,org,multi); J(grid1,work,work_extra,lwork);
	  D(DIR,grid,grid1,workgrid,work,work_extra,lwork);

	  /* Evaluate analytic derivative of multinomial in real space */
	  evalgrid(grid2,org,multiD);

	  /* Subtract and compare: note that boundaries on top level are
	     unreliable, so we check just on lower levels, which have ghost
	     images of top level by the above/below accounting test. */
	  gridsub(grid,grid,grid2);
	  gridptmult(grid,grid,nononesc);
	  gridptmult(grid,grid,grid);
	  tmp+=sqrt(gridsum(grid,0)/griddot(grid1,grid1));
	}
    printf("Multinomial tests on D%d: summed through (%d,%d,%d): %le\n",
	   DIR,PX,PY,PZ,tmp);

    Px=4; Py=0; Pz=0;
    /* Evaluate multinomial in coeff space, then apply D */
    evalgrid(grid1,org,multi); J(grid1,work,work_extra,lwork);
    D(DIR,grid,grid1,workgrid,work,work_extra,lwork);

    /* Evaluate analytic derivative of multinomial in real space */
    evalgrid(grid2,org,multiD);

    /* Subtract and compare: note that boundaries on top level are
       unreliable, so we check just on lower levels, which have ghost
       images of top level by the above/below accounting test. */
    gridsub(grid,grid,grid2);
    gridptmult(grid,grid,nononesc);
    gridptmult(grid,grid,grid);

    printf("(%d,%d,%d)-multinomial test on D%d: %le\n",Px,Py,Pz,DIR,
	   sqrt(gridsum(grid,0)/griddot(grid1,grid1)));	 
  }

  printf("-----\n\n");
}


syntax highlighted by Code2HTML, v. 0.9.1