/*
    DFT++ is a density functional package developed by the research group
    of Professor Tomas Arias

    Copyright 1996-2003 Sohrab Ismail-Beigi

    This file is part of DFT++.

    DFT++ is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    DFT++ is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with DFT++; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    Please see the file CREDITS for a list of authors.

    For academic users, we request that publications using results obtained with
    this software reference

    "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi
    and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000).

    and, if using the wavelet basis, further reference

    "Multiresolution analysis of electronic structure: semicardinal and wavelet bases,"
    T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999).

    and 

    "Robust ab initio calculation of condensed matter: transparent convergence through
    semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and
    Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003).

    For your convenience, preprints of the above articles may be obtained from
    http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively.
*/

/*
 * ewald.c:   Sohrab Ismail-Beigi     Jan 31, 1997,  May 12 1997
 *
 * Calculates the Ewald energy for a set of ions, etc.
 *
 */

/* $Id: ewald.cpp,v 1.11.2.9 2003/05/29 18:54:22 ivan Exp $ */

#include "header.h"

#ifdef _WIN32
// the following function has been copied from glibc 2.3.2 sources
/* @(#)s_erf.c 5.1 93/09/24 */
/*
 * ====================================================
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
 *
 * Developed at SunPro, a Sun Microsystems, Inc. business.
 * Permission to use, copy, modify, and distribute this
 * software is freely granted, provided that this notice
 * is preserved.
 * ====================================================
 */
typedef unsigned int u_int32_t;
typedef int int32_t;

typedef union
{
  double value;
  struct
  {
    u_int32_t lsw;
    u_int32_t msw;
  } parts;
} ieee_double_shape_type;

#define GET_HIGH_WORD(i,d)                                      \
do {                                                            \
  ieee_double_shape_type gh_u;                                  \
  gh_u.value = (d);                                             \
  (i) = gh_u.parts.msw;                                         \
} while (0)

#define SET_LOW_WORD(d,v)                                       \
do {                                                            \
  ieee_double_shape_type sl_u;                                  \
  sl_u.value = (d);                                             \
  sl_u.parts.lsw = (v);                                         \
  (d) = sl_u.value;                                             \
} while (0)

static const double
tiny	    = 1e-300,
half=  5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
one =  1.00000000000000000000e+00, /* 0x3FF00000, 0x00000000 */
two =  2.00000000000000000000e+00, /* 0x40000000, 0x00000000 */
	/* c = (float)0.84506291151 */
erx =  8.45062911510467529297e-01, /* 0x3FEB0AC1, 0x60000000 */
/*
 * Coefficients for approximation to  erf on [0,0.84375]
 */
efx =  1.28379167095512586316e-01, /* 0x3FC06EBA, 0x8214DB69 */
efx8=  1.02703333676410069053e+00, /* 0x3FF06EBA, 0x8214DB69 */
pp[]  =  {1.28379167095512558561e-01, /* 0x3FC06EBA, 0x8214DB68 */
 -3.25042107247001499370e-01, /* 0xBFD4CD7D, 0x691CB913 */
 -2.84817495755985104766e-02, /* 0xBF9D2A51, 0xDBD7194F */
 -5.77027029648944159157e-03, /* 0xBF77A291, 0x236668E4 */
 -2.37630166566501626084e-05}, /* 0xBEF8EAD6, 0x120016AC */
qq[]  =  {0.0, 3.97917223959155352819e-01, /* 0x3FD97779, 0xCDDADC09 */
  6.50222499887672944485e-02, /* 0x3FB0A54C, 0x5536CEBA */
  5.08130628187576562776e-03, /* 0x3F74D022, 0xC4D36B0F */
  1.32494738004321644526e-04, /* 0x3F215DC9, 0x221C1A10 */
 -3.96022827877536812320e-06}, /* 0xBED09C43, 0x42A26120 */
/*
 * Coefficients for approximation to  erf  in [0.84375,1.25]
 */
pa[]  = {-2.36211856075265944077e-03, /* 0xBF6359B8, 0xBEF77538 */
  4.14856118683748331666e-01, /* 0x3FDA8D00, 0xAD92B34D */
 -3.72207876035701323847e-01, /* 0xBFD7D240, 0xFBB8C3F1 */
  3.18346619901161753674e-01, /* 0x3FD45FCA, 0x805120E4 */
 -1.10894694282396677476e-01, /* 0xBFBC6398, 0x3D3E28EC */
  3.54783043256182359371e-02, /* 0x3FA22A36, 0x599795EB */
 -2.16637559486879084300e-03}, /* 0xBF61BF38, 0x0A96073F */
qa[]  =  {0.0, 1.06420880400844228286e-01, /* 0x3FBB3E66, 0x18EEE323 */
  5.40397917702171048937e-01, /* 0x3FE14AF0, 0x92EB6F33 */
  7.18286544141962662868e-02, /* 0x3FB2635C, 0xD99FE9A7 */
  1.26171219808761642112e-01, /* 0x3FC02660, 0xE763351F */
  1.36370839120290507362e-02, /* 0x3F8BEDC2, 0x6B51DD1C */
  1.19844998467991074170e-02}, /* 0x3F888B54, 0x5735151D */
/*
 * Coefficients for approximation to  erfc in [1.25,1/0.35]
 */
ra[]  = {-9.86494403484714822705e-03, /* 0xBF843412, 0x600D6435 */
 -6.93858572707181764372e-01, /* 0xBFE63416, 0xE4BA7360 */
 -1.05586262253232909814e+01, /* 0xC0251E04, 0x41B0E726 */
 -6.23753324503260060396e+01, /* 0xC04F300A, 0xE4CBA38D */
 -1.62396669462573470355e+02, /* 0xC0644CB1, 0x84282266 */
 -1.84605092906711035994e+02, /* 0xC067135C, 0xEBCCABB2 */
 -8.12874355063065934246e+01, /* 0xC0545265, 0x57E4D2F2 */
 -9.81432934416914548592e+00}, /* 0xC023A0EF, 0xC69AC25C */
sa[]  =  {0.0,1.96512716674392571292e+01, /* 0x4033A6B9, 0xBD707687 */
  1.37657754143519042600e+02, /* 0x4061350C, 0x526AE721 */
  4.34565877475229228821e+02, /* 0x407B290D, 0xD58A1A71 */
  6.45387271733267880336e+02, /* 0x40842B19, 0x21EC2868 */
  4.29008140027567833386e+02, /* 0x407AD021, 0x57700314 */
  1.08635005541779435134e+02, /* 0x405B28A3, 0xEE48AE2C */
  6.57024977031928170135e+00, /* 0x401A47EF, 0x8E484A93 */
 -6.04244152148580987438e-02}, /* 0xBFAEEFF2, 0xEE749A62 */
/*
 * Coefficients for approximation to  erfc in [1/.35,28]
 */
rb[]  = {-9.86494292470009928597e-03, /* 0xBF843412, 0x39E86F4A */
 -7.99283237680523006574e-01, /* 0xBFE993BA, 0x70C285DE */
 -1.77579549177547519889e+01, /* 0xC031C209, 0x555F995A */
 -1.60636384855821916062e+02, /* 0xC064145D, 0x43C5ED98 */
 -6.37566443368389627722e+02, /* 0xC083EC88, 0x1375F228 */
 -1.02509513161107724954e+03, /* 0xC0900461, 0x6A2E5992 */
 -4.83519191608651397019e+02}, /* 0xC07E384E, 0x9BDC383F */
sb[]  =  {0.0,3.03380607434824582924e+01, /* 0x403E568B, 0x261D5190 */
  3.25792512996573918826e+02, /* 0x40745CAE, 0x221B9F0A */
  1.53672958608443695994e+03, /* 0x409802EB, 0x189D5118 */
  3.19985821950859553908e+03, /* 0x40A8FFB7, 0x688C246A */
  2.55305040643316442583e+03, /* 0x40A3F219, 0xCEDF3BE6 */
  4.74528541206955367215e+02, /* 0x407DA874, 0xE79FE763 */
 -2.24409524465858183362e+01}; /* 0xC03670E2, 0x42712D62 */

double erfc(double x)
{
	int32_t hx,ix;
	double R,S,P,Q,s,y,z,r;
	GET_HIGH_WORD(hx,x);
	ix = hx&0x7fffffff;
	if(ix>=0x7ff00000) {			/* erfc(nan)=nan */
						/* erfc(+-inf)=0,2 */
	    return (double)(((u_int32_t)hx>>31)<<1)+one/x;
	}

	if(ix < 0x3feb0000) {		/* |x|<0.84375 */
	    double r1,r2,s1,s2,s3,z2,z4;
	    if(ix < 0x3c700000)  	/* |x|<2**-56 */
		return one-x;
	    z = x*x;
	    r1 = pp[0]+z*pp[1]; z2=z*z;
	    r2 = pp[2]+z*pp[3]; z4=z2*z2;
	    s1 = one+z*qq[1];
	    s2 = qq[2]+z*qq[3];
	    s3 = qq[4]+z*qq[5];
            r = r1 + z2*r2 + z4*pp[4];
	    s  = s1 + z2*s2 + z4*s3;
	    y = r/s;
	    if(hx < 0x3fd00000) {  	/* x<1/4 */
		return one-(x+x*y);
	    } else {
		r = x*y;
		r += (x-half);
	        return half - r ;
	    }
	}
	if(ix < 0x3ff40000) {		/* 0.84375 <= |x| < 1.25 */
	    double s2,s4,s6,P1,P2,P3,P4,Q1,Q2,Q3,Q4;
	    s = fabs(x)-one;
	    P1 = pa[0]+s*pa[1]; s2=s*s;
	    Q1 = one+s*qa[1];   s4=s2*s2;
	    P2 = pa[2]+s*pa[3]; s6=s4*s2;
	    Q2 = qa[2]+s*qa[3];
	    P3 = pa[4]+s*pa[5];
	    Q3 = qa[4]+s*qa[5];
	    P4 = pa[6];
	    Q4 = qa[6];
	    P = P1 + s2*P2 + s4*P3 + s6*P4;
	    Q = Q1 + s2*Q2 + s4*Q3 + s6*Q4;
	    if(hx>=0) {
	        z  = one-erx; return z - P/Q;
	    } else {
		z = erx+P/Q; return one+z;
	    }
	}
	if (ix < 0x403c0000) {		/* |x|<28 */
	    x = fabs(x);
 	    s = one/(x*x);
	    if(ix< 0x4006DB6D) {	/* |x| < 1/.35 ~ 2.857143*/
		double R1,R2,R3,R4,S1,S2,S3,S4,s2,s4,s6,s8;
	    R1 = ra[0]+s*ra[1];s2 = s*s;
	    S1 = one+s*sa[1];  s4 = s2*s2;
	    R2 = ra[2]+s*ra[3];s6 = s4*s2;
	    S2 = sa[2]+s*sa[3];s8 = s4*s4;
	    R3 = ra[4]+s*ra[5];
	    S3 = sa[4]+s*sa[5];
	    R4 = ra[6]+s*ra[7];
	    S4 = sa[6]+s*sa[7];
	    R = R1 + s2*R2 + s4*R3 + s6*R4;
	    S = S1 + s2*S2 + s4*S3 + s6*S4 + s8*sa[8];
	    } else {			/* |x| >= 1/.35 ~ 2.857143 */
		double R1,R2,R3,S1,S2,S3,S4,s2,s4,s6;
		if(hx<0&&ix>=0x40180000) return two-tiny;/* x < -6 */
		R1 = rb[0]+s*rb[1];s2 = s*s;
		S1 = one+s*sb[1];  s4 = s2*s2;
		R2 = rb[2]+s*rb[3];s6 = s4*s2;
		S2 = sb[2]+s*sb[3];
		R3 = rb[4]+s*rb[5];
		S3 = sb[4]+s*sb[5];
		S4 = sb[6]+s*sb[7];
		R = R1 + s2*R2 + s4*R3 + s6*rb[6];
		S = S1 + s2*S2 + s4*S3 + s6*S4;
	    }
	    z  = x;
	    SET_LOW_WORD(z,0);
	    r  =  exp(-z*z-0.5625)*exp((z-x)*(z+x)+R/S);
	    if(hx>0) return r/x; else return two-r/x;
	} else {
	    if(hx>0) return tiny*tiny; else return two-tiny;
	}
}

#endif


/*
 * Global variables that control how far
 * out in R- and G-space the lattice sums go for the ewald energy.
 * Sums are done from _start to _end in x,y, and z directions.
 */
static int Ewald_setup_was_done = 0;
static int Nlat_start_real, Nlat_end_real,Nlat_start_recip, Nlat_end_recip;

/*
 *      Nlat_start_real   Nlat_end_real
 *      Nlat_start_recip  Nlat_end_recip
 *
 * The real and recip. space sums go from -Nlat to Nlat in each of x,y, and z
 * directions, with Nlat going from Nlat_start to Nlat_end.
 *
 */
void
setup_Ewald(int nlat_s_real, int nlat_e_real,
	    int nlat_s_recip, int nlat_e_recip)
{
  Nlat_start_real  = nlat_s_real;
  Nlat_end_real    = nlat_e_real;
  Nlat_start_recip = nlat_s_recip;
  Nlat_end_recip   = nlat_e_recip;
  Ewald_setup_was_done = 1;
}


/*
 * Retrieve the ewald parameters.
 */
int get_ewald(int &nlat_s_real, int &nlat_e_real,
	       int &nlat_s_recip, int &nlat_e_recip)
{
  if (! Ewald_setup_was_done) return 0;

  nlat_s_real = Nlat_start_real;
  nlat_e_real = Nlat_end_real;
  nlat_s_recip = Nlat_start_recip;
  nlat_e_recip = Nlat_end_recip;
  return 1;
}


/*
 * Calculates the Ewald energy per unit cell 
 * for natoms atoms of charges Z[0..natoms-1]
 * emersed in a uniform compensating charg density sum_i(Z[i])/Vol
 * where Vol = det(R) = unit cell volume.
 * atpos[0..natoms-1][3] is in lattice coordinates.
 * R[][] contains the lattice vectors in the columns.
 *
 * With tau denoting an atom in the basis,
 * the energy is Ewald = 0.5*sum_{tau} { Z[tau]*phi[tau] }
 * where phi[tau] is the electrostatic potential caused by ALL OTHER ions
 * everywhere and the uniform compensating background.
 *
 * The energy comes in two parts:  real-space sum of screened point-charges
 * (screend by gaussians), and a G-space sum of the potential of 
 * Guassians in a uniform background; there are also some constants from
 * "renormalization" effects (cutoffs going to infinity in a controlled way).
 *
 * We calculate the above sums in R- and G-space by summing the values
 * in a box of size [-Nlat,Nlat]^3...Nlat is run through a set of values
 * to check for convergence.
 *
 */

real
Ewald(Ioninfo &ioninfo, Lattice &lattice)
{
  /* Constants */
  const real pi = M_PI;
  const real twopi = 2.0*pi;
  const real fourpi = 4.0*pi;

  /* Local vars */
  real *Z;
  vector3 *atpos;
  int natoms;
  matrix3 R,RTR,G,GGT;
  vector3 x;
  real vol,G2,r,temp,sigma,eta;
  real angle,SG[2];
  real Ereal,Erecip,Etot;
  int sp,i,j,k,l,tau,taup,cell[3],Nlat;

  if (!Ewald_setup_was_done)
    die("Ewald() was not setup!!!\n");

  /* Take out all the ions from the Ioninfo structure and put them into
   * a long list of Z[] and atpos[] values */
  natoms = 0;
  for (sp=0; sp < ioninfo.nspecies; sp++)
    natoms += ioninfo.species[sp].natoms;
  Z = (real *)mymalloc(sizeof(real)*natoms,"Z","Ewald()");
  atpos = (vector3 *)mymalloc(sizeof(vector3)*natoms,"atpos","Ewald()");
  k = 0;
  for (sp=0; sp < ioninfo.nspecies; sp++)
    for (j=0; j < ioninfo.species[sp].natoms; j++)
      {
	Z[k] = ioninfo.species[sp].Z;
	atpos[k] = ioninfo.species[sp].atpos[j];
	k++;
      }

  /* Unit cell volume */
  vol = lattice.unit_cell_volume;

  // Lattice vector matrix
  R = lattice.R;

  /* matrix of dot-products of lattice vectors */
  RTR = lattice.RTR;

  /* recip. lattice vectors in rows of G */
  G = lattice.G;

  /* dot products of recip. lattice vectors */
  GGT = lattice.GGT;

  dft_log("\n------ Ewald() -----\n");
  dft_log("latvec = \n");
  R.print(dft_global_log,"%lg ");
  dft_log("natoms = %d\n",natoms);
  dft_log(DFT_ANAL_LOG,"Z = [ ");
  for (i=0; i < natoms; i++)
    dft_log(DFT_ANAL_LOG,"%lg ",Z[i]);
  dft_log(DFT_ANAL_LOG,"]\natpos=\n");
  if (dft_global_log->get_level() >= DFT_ANAL_LOG)
    for (i=0; i < natoms; i++)
      atpos[i].print(dft_global_log,"%lg ");

  /* set width of gaussian to 0.4 the nearest-neighbor distance */
  /* Here I'll loop over the cells close to the origin and find
   * the minimal distance. */
  sigma = sqrt(RTR.m[0][0]);
  for (i=-2; i<=2; i++)
    for (j=-2; j<=2; j++)
      for (k=-2; k<=2; k++)
	if ( i!=0 || j!=0 || k!=0 )
	  {
	    x.v[0] = i; x.v[1] = j; x.v[2] = k;
	    r = sqrt(x*(RTR*x));
	    if (r < sigma)
	      sigma = r;
	  }
  sigma *= 0.4;

  /* set scale of width of gaussian to roughly the interatomic distance */
/*   sigma = 0.4*pow(vol/(real)natoms,1.0/3.0); */


  eta = 1.0/(sqrt(2.0)*sigma);
  dft_log("Using sigma = %lg   eta = %lg for gaussian\n\n", sigma,eta);

  /* Real-space part of energy:  (1) constant parts */
  Ereal = (real)0.0;
  for (tau=0; tau < natoms; tau++)
    for (taup=0; taup < natoms; taup++)
      {
	dft_log(DFT_NERD_LOG,
		"\nReal space potential for tau=%d taup=%d\n",
		tau,taup);
	/* The constant part of the energy from "renomalization" */
	temp = -0.5*Z[tau]*Z[taup]*pi/(vol*eta*eta);
	/* If tau==tau', then add the "negative" potential of gaussian at
	 * tau */
	if (tau == taup)
	  temp += -0.5*Z[tau]*Z[taup]*2.0*eta/sqrt(pi);
	Ereal += temp;
	dft_log(DFT_NERD_LOG,"Constant part = %le\n",temp);
      }
  /* Real-space part of energy:  (2) lattice sums over screened ion pairs */
  /* loop over size of lattice sums */
  for (Nlat = 0; Nlat <= Nlat_end_real; Nlat++)
    {
      for (tau=0; tau < natoms; tau++)
	for (taup=0; taup < natoms; taup++)
	  {
	    /* loop over cells */
	    for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
	      for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
		for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
		  /* For each value of Nlat, we only sum over the cells
		   * which have one coordinate == +/-Nlat, i.e. the surfaces
		   * of the cube of points running [-Nlat,Nlat] in each
		   * direction. */
		  if ( abs(cell[0])==Nlat ||
		       abs(cell[1])==Nlat ||
		       abs(cell[2])==Nlat    )
		    /* Only exclude the cell==0 and tau==tau' term */
		    if (tau!=taup || cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
		      {
			/* Find the distance |cell+tau'-tau| between atom at
			 * tau and the other atom being considered.
			 * x is this cell+tau'-tau vector in lattice coords;
			 * r is its actual length in real distance units*/
			for (l=0; l < 3; l++)
			  x.v[l] = cell[l] +
			           atpos[taup].v[l] - atpos[tau].v[l];
			r = sqrt(x*(RTR*x));
			temp = 0.5*Z[tau]*Z[taup]*erfc(eta*r)/r;
			Ereal += temp;
			dft_log(DFT_NERD_LOG,
				"cell=[%d %d %d] r =%lg e = %le\n",
				cell[0],cell[1],cell[2],r,temp);
		      }
	  } /* tau' loop */

	  dft_log("Nlat = %2d  Real-space energy = %25.15le\n",
		  Nlat,Ereal);
	  dft_log_flush();

    } /* Nlat loop */

  /*
   * Reciprocal space contribution:
   * Erecip = 0.5*sum_{G!=0}
   *            {4*pi*exp(-|G|^2/(4*eta^2))/(vol*|G|^2)*|S(G)|^2}
   * where S(G) = sum_{tau} { Z[tau]*exp(-i*G*r_tau) }
   *
   * r_tau = R*tau (R is matrix, tau is 3-vector) and
   *     G = cell*G (second G is matrix, cell is row-vector of integers)
   * so G*r_tau = 2*pi*cell*tau.
   */
  Erecip = (real)0.0;
  for (Nlat = 1; Nlat <= Nlat_end_recip; Nlat++)
    {
      for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
	for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
	  for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
	    /* For each value of Nlat, we only sum over the cells
	     * which have one coordinate == +/-Nlat, i.e. the surfaces
	     * of the cube of points running [-Nlat,Nlat] in each
	     * direction. */
	    if ( abs(cell[0])==Nlat ||
		 abs(cell[1])==Nlat ||
		 abs(cell[2])==Nlat    )
	      /* Skip G=0 */
	      if (cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
		{
		  /* Calculate structure factor */
		  SG[0] = SG[1] = 0.0;
		  for (tau=0; tau < natoms; tau++)
		    {
		      angle = -twopi*(cell[0]*atpos[tau].v[0]+
				      cell[1]*atpos[tau].v[1]+
				      cell[2]*atpos[tau].v[2]  );
		      SG[0] += Z[tau]*cos(angle);
		      SG[1] += Z[tau]*sin(angle);
		    }
		  /* Calculate |G|^2 */
		  G2 = GGT.m[0][0]*cell[0]*cell[0] + 
		    GGT.m[1][1]*cell[1]*cell[1] + 
		    GGT.m[2][2]*cell[2]*cell[2] + 
		    2.0*( GGT.m[0][1]*cell[0]*cell[1] +
			  GGT.m[0][2]*cell[0]*cell[2] +
			  GGT.m[1][2]*cell[1]*cell[2]    );
		  /* The energy for G */
		  temp = 0.5*fourpi*exp(-G2/(4.0*eta*eta))/(G2*vol)*
		    (SG[0]*SG[0]+SG[1]*SG[1]);
		  Erecip += temp;

		  dft_log(DFT_NERD_LOG,
			  "G=[%d %d %d] G2 =%lg  e = %le\n",
			  cell[0],cell[1],cell[2],G2,temp);
		}

      dft_log("Nlat = %2d  Reciprocal space energy = %25.15le\n",
		Nlat,Erecip);
      dft_log_flush();

    } /* of Nlat loop */
  Etot = Ereal + Erecip;

  dft_log("\nEwald energy = %25.15le\n\n",Etot);
  dft_log_flush();

  myfree(Z);
  myfree(atpos);
  return Etot;
}

/*
 * Derivative of Ewald energy versus the position of the atom 'atom' of
 * species 'species' (lattice coordinates).
 */
vector3
dEwald_datom_pos(Ioninfo &ioninfo, Lattice &lattice,
		 const int species,const int atom)
{
  const real pi = M_PI,
             twopi = 2.0*pi,
             fourpi = 4.0*pi,
             sqrtpi = sqrt(pi);

  /* Local vars */
  real *Z;
  vector3 *atpos;
  int natoms;
  matrix3 R,RTR,G,GGT;
  real vol,sigma,eta;
  int sp,i,j,k,l,cell[3],Nlat,taup;
  int tau; /* the index of the atom corresponding to 'species' and 'atom' */
  vector3 result(0.0,0.0,0.0);   /* holds the final result */

  if (!Ewald_setup_was_done)
    die("Ewald() was not setup!!!\n");

  /* Take out all the ions from the Ioninfo structure and put them into
   * a long list of Z[] and atpos[] values */
  natoms = 0;
  for (sp=0; sp < ioninfo.nspecies; sp++)
    natoms += ioninfo.species[sp].natoms;
  Z = (real *)mymalloc(sizeof(real)*natoms,"Z","dEwald_datom_pos()");
  atpos = (vector3 *)mymalloc(sizeof(vector3)*natoms,
			      "atpos","dEwald_datom_pos()");
  k = 0;
  tau = -1;
  for (sp=0; sp < ioninfo.nspecies; sp++)
    for (j=0; j < ioninfo.species[sp].natoms; j++)
      {
	Z[k] = ioninfo.species[sp].Z;
	atpos[k] = ioninfo.species[sp].atpos[j];
	/* Find the index correspoding to species/atom and store it in tau */
	if (sp == species && j == atom)
	  tau = k;
	k++;
      }
  if (tau == -1)
    die("dEwlad_datom_pos():  no atom corresponding to requested deriv!!!\n");

  /* Unit cell volume */
  vol = lattice.unit_cell_volume;

  // Lattice vector matrix
  R = lattice.R;

  /* matrix of dot-products of lattice vectors */
  RTR = lattice.RTR;

  /* recip. lattice vectors in rows of G */
  G = lattice.G;

  /* dot products of recip. lattice vectors */
  GGT = lattice.GGT;

  /* set width of gaussian to 0.4 the nearest-neighbor distance */
  /* Here I'll loop over the cells close to the origin and find
   * the minimal distance. */
  sigma = sqrt(RTR.m[0][0]);
  for (i=-2; i<=2; i++)
    for (j=-2; j<=2; j++)
      for (k=-2; k<=2; k++)
	if ( i!=0 || j!=0 || k!=0 )
	  {
	    vector3 x;
	    real r;

	    x.v[0] = i; x.v[1] = j; x.v[2] = k;
	    r = sqrt(x*(RTR*x));
	    if (r < sigma)
	      sigma = r;
	  }
  sigma *= 0.4;
  eta = 1.0/(sqrt(2.0)*sigma);

  /* Real-space part of derivative */
  /* Loop over atoms and cells */
  Nlat = Nlat_end_real;
  for (taup=0; taup < natoms; taup++)
    for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
      for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
	for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
	  /* Only exclude the cell==0 and tau==tau' term */
	  if (tau!=taup || cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
	    {
	      real temp,r;
	      vector3 x,RTRx;

	      /* Find the distance |cell+tau'-tau| between atom at tau
	       * and the other atom being considered.
	       * x is this cell+tau'-tau vector in lattice coords;
	       * r is its actual length in real distance units*/
	      for (l=0; l < 3; l++)
		x.v[l] = cell[l] + atpos[tau].v[l] - atpos[taup].v[l];
	      RTRx = RTR*x;
	      r = sqrt(x*RTRx);
	      temp = -Z[tau]*Z[taup]*(erfc(eta*r)/(r*r) +
			 	      2.0*eta*exp(-eta*eta*r*r)/(r*sqrtpi) )/r;
	      for (l=0; l < 3; l++)
		result.v[l] += temp*RTRx.v[l];
	    }

  /*
   * Reciprocal space contribution.
   */
  Nlat = Nlat_end_recip;
  for (cell[0]=-Nlat; cell[0]<=Nlat; cell[0]++)
    for (cell[1]=-Nlat; cell[1]<=Nlat; cell[1]++)
      for (cell[2]=-Nlat; cell[2]<=Nlat; cell[2]++)
	/* Skip G=0 */
	if (cell[0]!=0 || cell[1]!=0 || cell[2]!=0)
	  {
	    complex SG,Stau;
	    real G2,temp,angle;
	    int taup;
	    
	    /* Calculate structure factor */
	    SG.x = SG.y = 0.0;
	    for (taup=0; taup < natoms; taup++)
	      {
		angle = -twopi*(cell[0]*atpos[taup].v[0]+
				cell[1]*atpos[taup].v[1]+
				cell[2]*atpos[taup].v[2]  );
		SG.x += Z[taup]*cos(angle);
		SG.y += Z[taup]*sin(angle);
	      }

	    /* Structure factor for tau alone: Ztau*exp(-i*G.tau) */
	    angle = -twopi*(cell[0]*atpos[tau].v[0]+ 
			    cell[1]*atpos[tau].v[1]+
			    cell[2]*atpos[tau].v[2]  );
	    Stau.x = Z[tau]*cos(angle);
	    Stau.y = Z[tau]*sin(angle);
	    
	    /* Calculate |G|^2 */
	    G2 = GGT.m[0][0]*cell[0]*cell[0] + 
	      GGT.m[1][1]*cell[1]*cell[1] + 
	      GGT.m[2][2]*cell[2]*cell[2] + 
	      2.0*( GGT.m[0][1]*cell[0]*cell[1] +
		    GGT.m[0][2]*cell[0]*cell[2] +
		    GGT.m[1][2]*cell[1]*cell[2]    );
	    
	    /* The contribution for G */
	    temp = fourpi*exp(-G2/(4.0*eta*eta))/(G2*vol);
	    temp *= twopi*(SG.x*Stau.y-SG.y*Stau.x);
	    for (l=0; l < 3; l++)
	      result.v[l] += temp*cell[l];
	  }

  myfree(Z);
  myfree(atpos);

  /* return the fruits of our labors */
  return result;
}