/* DFT++ is a density functional package developed by the research group of Professor Tomas Arias Copyright 1996-2003 Sohrab Ismail-Beigi This file is part of DFT++. DFT++ is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DFT++ is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DFT++; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please see the file CREDITS for a list of authors. For academic users, we request that publications using results obtained with this software reference "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000). and, if using the wavelet basis, further reference "Multiresolution analysis of electronic structure: semicardinal and wavelet bases," T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999). and "Robust ab initio calculation of condensed matter: transparent convergence through semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003). For your convenience, preprints of the above articles may be obtained from http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively. */ /* $Id: conv3d.cpp,v 1.1.2.8 2003/05/29 18:54:15 ivan Exp $ */ /* * $Log: conv3d.cpp,v $ * Revision 1.1.2.8 2003/05/29 18:54:15 ivan * Update the PRL reference * * Revision 1.1.2.7 2003/05/23 01:49:00 sahak * added the new header * * Revision 1.1.2.6 2003/04/03 01:50:48 ivan * minor cosmetic fixes to compile on balboa (Alpha) and fiber (SGI) * with the native compilers * * Revision 1.1.2.5 2003/04/02 23:35:13 ivan * Removing C-dependencies from the code. Now it needs only C++ compiler * * Revision 1.1.2.4 2002/10/28 22:07:29 matthew * Non-orthogonal Laplaciancvs add coeff_d.c! Works for orthorhombic cells, still in need of debugging for other unit cells. * * Revision 1.2 2001/08/21 09:05:35 daykov * Overlapping Grids: Idag working * (still can do speed optimization and currently the deepest certical segment * in the grid tree must be the first in order to visit everybody in the * transforms - actually Idag) * * Revision 1.1.1.1 2001/04/18 17:15:13 daykov * starting project * * Revision 1.1 2000/07/11 01:05:04 daykov * Initial revision * * Revision 1.1 1999/09/14 19:39:52 torkel * Initial revision * * Revision 1.14 1999/08/06 19:25:03 torkel * *** empty log message *** * * Revision 1.13 1999/07/04 20:48:31 torkel * Full O implemented. * * Revision 1.12 1999/07/02 07:03:54 torkel * the convolution in Osame seems to work for this version. * * Revision 1.11 1999/07/02 03:12:07 torkel * In the middle of making Osame work * * Revision 1.10 1999/06/23 22:19:26 torkel * revised version where Obelow is dagger of Oabove for grid without * siblings. * * Revision 1.9 1999/06/15 23:24:24 torkel * debugging, trying to make Obelow work * * Revision 1.8 1999/06/11 12:52:48 torkel * including convolutions for obelow * * Revision 1.5 1999/04/25 22:32:56 muchomas * *** empty log message *** * * Revision 1.4 1999/04/25 18:35:39 muchomas * Contains new routines for same-level operator convolutions * Working toward more arbitrary length convolution routines * * Revision 1.3 1999/04/20 15:59:37 muchomas * Fully documented code * * Revision 1.2 1999/04/19 18:58:39 muchomas * Consistent set of calling conventions for aconv3d routines * * Revision 1.1 1999/04/19 18:12:11 muchomas * Initial revision * * Revision 1.13 1999/04/17 22:00:09 muchomas * Working dagger operators, still slow... * * Revision 1.12 1999/04/14 19:48:37 muchomas * Working transpose transform, but doesn't take input/output arrays -- yet! * * Revision 1.11 1999/04/13 15:11:36 muchomas * Does ghosts, making compact version, but still in larger array * * Revision 1.10 1999/04/13 14:32:53 muchomas * Idag that computes only only ghosts, leaving sparse data distribution * * Revision 1.9 1999/04/11 20:21:47 muchomas * Repackaged calling sequence for upper/lower forward/inverse transform package * * Revision 1.8 1999/04/11 20:03:03 muchomas * Workspace dimensions now determined by aconvP3d * * Revision 1.7 1999/04/11 19:53:46 muchomas * Forward/Inverse package working * * Revision 1.6 1999/04/11 03:06:31 muchomas * Forward transform with proper indexing into "upper" array * * Revision 1.5 1999/04/10 23:01:44 muchomas * Thinks of points and sets of points, rather than lines and planes * * Revision 1.4 1999/04/10 22:38:57 muchomas * Now using log functionality * */ #include #include #include #include "convsets.h" /* High speed, 3d convolution routines */ /* Back and forth, linear convolution for +/- 3 filters using padding (IN PLACE) out: input/output array for in place computation n: size of actual data set being convolved npad: size of padded region needed for "leakage" of foward filters over the edge reversefilter: reverse-time filter coefficients forwardfilter: forward-time filter coefficients */ void convpts3(double inout[],int n,int npad,double reversefilter[], double forwardfilter[]) { register int i; /* First, make sure pad is zero */ for (i=n; i=3; i--) inout[i]=t0*inout[i]+t1*inout[i-1]+t2*inout[i-2]+t3*inout[i-3]; /* Careful with back end of data! */ inout[2]=t0*inout[2]+t1*inout[1]+t2*inout[0]; inout[1]=t0*inout[1]+t1*inout[0]; inout[0]=t0*inout[0]; } /* Forward part (padding now ensures room!) */ { register double t0=forwardfilter[0],t1=forwardfilter[1], t2=forwardfilter[2],t3=forwardfilter[3]; for (i=0; i=5; i--) inout[i]=t0*inout[i]+t1*inout[i-1]+t2*inout[i-2]+t3*inout[i-3]+ t4*inout[i-4]+t5*inout[i-5]; /* Careful with back end of data! */ inout[4]=t0*inout[4]+t1*inout[3]+t2*inout[2]+t3*inout[1]+t4*inout[0]; inout[3]=t0*inout[3]+t1*inout[2]+t2*inout[1]+t3*inout[0]; inout[2]=t0*inout[2]+t1*inout[1]+t2*inout[0]; inout[1]=t0*inout[1]+t1*inout[0]; inout[0]=t0*inout[0]; } /* Forward part (padding now ensures room!) */ { register double t0=forwardfilter[0], t1=forwardfilter[1], t2=forwardfilter[2], t3=forwardfilter[3], t4=forwardfilter[4], t5=forwardfilter[5]; for (i=0; i=5; i--) out[i]=t0*in[i]+t1*in[i-1]+t2*in[i-2]+t3*in[i-3]+ t4*in[i-4]+t5*in[i-5]; out[4]=out[npad-1]; out[3]=out[npad-2]; out[2]=out[npad-3]; out[1]=out[npad-4]; out[0]=out[npad-5]; } /* Forward part (padding now ensures room!) */ { register double t0=forwardfilter[0], t1=forwardfilter[1], t2=forwardfilter[2], t3=forwardfilter[3], t4=forwardfilter[4], t5=forwardfilter[5]; for (i=0; i=5; i--) out[i]=t0*in[i]+t1*in[i-1]+t2*in[i-2]+t3*in[i-3]+ t4*in[i-4]+t5*in[i-5]; out[4]=t0*in[4]+t1*in[3]+t2*in[2]+t3*in[1]+t4*in[0]; out[3]=t0*in[3]+t1*in[2]+t2*in[1]+t3*in[0]; out[2]=t0*in[2]+t1*in[1]+t2*in[0]; out[1]=t0*in[1]+t1*in[0]; out[0]=t0*in[0]; } /* Forward part (padding now ensures room!) */ { register double t0=forwardfilter[0], t1=forwardfilter[1], t2=forwardfilter[2], t3=forwardfilter[3], t4=forwardfilter[4], t5=forwardfilter[5]; for (i=0; i7; i--) in[i]=t0*in[i]+t1*in[i-1]+t2*in[i-2]+t3*in[i-3]+ t4*in[i-4]+t5*in[i-5]+t6*in[i-6]+t7*in[i-7]+ t8*in[i-8]; in[7]=t0*in[7]+t1*in[6]+t2*in[5]+t3*in[4] +t4*in[3]+t5*in[2]+t6*in[1]+t7*in[0]; in[6]=t0*in[6]+t1*in[5]+t2*in[4]+t3*in[3]+t4*in[2]+t5*in[1]+t6*in[0]; in[5]=t0*in[5]+t1*in[4]+t2*in[3]+t3*in[2]+t4*in[1]+t5*in[0]; in[4]=t0*in[4]+t1*in[3]+t2*in[2]+t3*in[1]+t4*in[0]; in[3]=t0*in[3]+t1*in[2]+t2*in[1]+t3*in[0]; in[2]=t0*in[2]+t1*in[1]+t2*in[0]; in[1]=t0*in[1]+t1*in[0]; in[0]=t0*in[0]; } /* Forward part (padding now ensures room!) */ /* Note that even points on "in" are accumulated onto "out" */ { register double t0=forwardfilter[0], t1=forwardfilter[1], t2=forwardfilter[2], t3=forwardfilter[3], t4=forwardfilter[4], t5=forwardfilter[5], t6=forwardfilter[6], t7=forwardfilter[7], t8=forwardfilter[8]; for (i=0; i7; i--) in[i]=t0*in[i]+t1*in[i-1]+t2*in[i-2]+t3*in[i-3]+ t4*in[i-4]+t5*in[i-5]+t6*in[i-6]+t7*in[i-7]+ t8*in[i-8]; in[7]=t0*in[7]+t1*in[6]+t2*in[5]+t3*in[4] +t4*in[3]+t5*in[2]+t6*in[1]+t7*in[0]; in[6]=t0*in[6]+t1*in[5]+t2*in[4]+t3*in[3]+t4*in[2]+t5*in[1]+t6*in[0]; in[5]=t0*in[5]+t1*in[4]+t2*in[3]+t3*in[2]+t4*in[1]+t5*in[0]; in[4]=t0*in[4]+t1*in[3]+t2*in[2]+t3*in[1]+t4*in[0]; in[3]=t0*in[3]+t1*in[2]+t2*in[1]+t3*in[0]; in[2]=t0*in[2]+t1*in[1]+t2*in[0]; in[1]=t0*in[1]+t1*in[0]; in[0]=t0*in[0]; } /* Forward part (padding now ensures room!) */ /* Note that even points on "in" are accumulated onto "out" */ { register double t0=forwardfilter[0], t1=forwardfilter[1], t2=forwardfilter[2], t3=forwardfilter[3], t4=forwardfilter[4], t5=forwardfilter[5], t6=forwardfilter[6], t7=forwardfilter[7]; register double t8=forwardfilter[8]; for (i=0; i=padyb+nly) { /* fill in pads with zeros */ for (i=0; i=padxb && i=padyb && j=padxb && i=padyb && j=padyb+nly) { /* fill in pads with zeros */ for (i=0; i2; i-=2) { out[i]= t3*up[(i-3)/2]+t1*up[(i-1)/2]+t1*up[(i+1)/2]+t3*up[(i+3)/2]; out[i-1]=up[(i-1)/2]; } } /* Back and forth, linear convolution for +/- 8 filters using padding (IN / OUT on non-ghosts) out: output array (lower level) up: input array of just ghosts (upper level) n: size of actual data set being convolved npad: size of padded region needed for "leakage" of foward filters over the edge reversefilter: reverse-time filter coefficients forwardfilter: forward-time filter coefficients */ void convptsP8(double out[],double up[],int n,int npad, double reversefilter[],double forwardfilter[]) { register int i; /* Reverse part */ { register double t0=reversefilter[0], t1=reversefilter[1], t2=reversefilter[2], t3=reversefilter[3], t4=reversefilter[4], t5=reversefilter[5], t6=reversefilter[6], t7=reversefilter[7], t8=reversefilter[8]; /* Pad region */ /* out[npad-1]=0.; */ /* out[npad-2]=t8*up[(npad-2-8)/2]; */ /* out[npad-3]=t7*up[(npad-3-7)/2]; */ /* out[npad-4]=t6*up[(npad-4-6)/2]+t8*up[(npad-4-8)/2]; */ /* out[npad-5]=t5*up[(npad-5-5)/2]+t7*up[(npad-5-7)/2]; */ /* out[npad-6]=t4*up[(npad-6-4)/2]+t6*up[(npad-6-6)/2]+t8*up[(npad-6-8)/2]; */ /* out[npad-7]=t3*up[(npad-7-3)/2]+t5*up[(npad-7-5)/2]+t7*up[(npad-7-7)/2]; */ /* out[npad-8]=t2*up[(npad-8-2)/2]+t4*up[(npad-8-4)/2]+t6*up[(npad-8-6)/2]+t8*up[(npad-8-8)/2]; */ /* Main reverse loop exploiting zeros */ for (i=n-1; i>=8; i-=2) { out[i]=t1*up[(i-1)/2]+t3*up[(i-3)/2]+t5*up[(i-5)/2]+t7*up[(i-7)/2]; out[i-1]=t0*up[(i-1)/2]+t2*up[(i-1-2)/2] +t4*up[(i-1-4)/2]+t6*up[(i-1-6)/2]+t8*up[(i-1-8)/2]; } /* Fence posting */ /* out[2]=t0*in[2] +t2*in[0]; */ /* out[7]=t1*up[(7-1)/2]+t3*up[(7-3)/2]+t5*up[(7-5)/2]+t7*up[(7-7)/2]; */ /* out[6]=t0*up[(6)/2]+t2*up[(6-2)/2]+t4*up[(6-4)/2]+t6*up[(6-6)/2]; */ /* out[5]=t1*up[(5-1)/2]+t3*up[(5-3)/2]+t5*up[(5-5)/2]; */ /* out[4]=t0*up[(4)/2]+t2*up[(4-2)/2]+t4*up[(4-4)/2]; */ /* out[3]=t1*up[(3-1)/2]+t3*up[(3-3)/2]; */ /* out[2]=t0*up[(2)/2]+t2*up[(2-2)/2]; */ /* out[1]=t1*up[(1-1)/2]; */ /* out[0]=t0*up[(0)/2]; */ } /* Forward part (padding now ensures room!) */ { register double t0=forwardfilter[0], t1=forwardfilter[1], t2=forwardfilter[2], t3=forwardfilter[3], t4=forwardfilter[4], t5=forwardfilter[5], t6=forwardfilter[6], t7=forwardfilter[7], t8=forwardfilter[8]; for (i=8; i8; i-=2) { out[i]= t7*up[(i-7)/2]+t5*up[(i-5)/2]+t3*up[(i-3)/2] +t1*up[(i-1)/2]+t1*up[(i+1)/2]+t3*up[(i+3)/2] +t5*up[(i+5)/2]+t7*up[(i+7)/2]; out[i-1]= t8*up[(i-8)/2]+t6*up[(i-6)/2]+t4*up[(i-4)/2] +t2*up[(i-2)/2]+t0*up[i/2]+t2*up[(i+2)/2]+t4*up[(i+4)/2] +t6*up[(i+6)/2] +t8*up[(i+8)/2]; } } else { /* antisymmetric filter */ /* Main reverse loop exploiting zeros */ for (i=n-9; i>8; i-=2) { out[i]= -1.*t7*up[(i-7)/2]-t5*up[(i-5)/2]-t3*up[(i-3)/2] -t1*up[(i-1)/2]+t1*up[(i+1)/2]+t3*up[(i+3)/2] +t5*up[(i+5)/2]+t7*up[(i+7)/2]; out[i-1]= -1.*t8*up[(i-8)/2]-t6*up[(i-6)/2]-t4*up[(i-4)/2] -t2*up[(i-2)/2]+t0*up[i/2]+t2*up[(i+2)/2]+t4*up[(i+4)/2] +t6*up[(i+6)/2] +t8*up[(i+8)/2]; } } } /* I'm not going to change this routine to use anti/symmetric filters, since it's optimized for L anyway (which is a symmetric filter). -- mhe 4/12/02 */ void convptsP8fullL(double out[],double up[],int n,int npad,double fullfilt[]) { register int i; register double t0=fullfilt[0],t1=fullfilt[1],t2=fullfilt[2], t3=fullfilt[3],t4=fullfilt[4],t6=fullfilt[6]; /* Main reverse loop exploiting zeros */ for (i=n-9; i>8; i-=2) { out[i]= t3*up[(i-3)/2] +t1*up[(i-1)/2]+t1*up[(i+1)/2]+t3*up[(i+3)/2]; out[i-1]= t6*up[(i-6)/2]+t4*up[(i-4)/2] +t2*up[(i-2)/2]+t0*up[i/2]+t2*up[(i+2)/2]+t4*up[(i+4)/2] +t6*up[(i+6)/2]; } } /* Full two-level 3d convolution of transform type (IN PLACE) Computes I +/- ~P I P, where P is projection onto ghosts, ~P is the projection onto non-ghosts, and I is the simple interpolet convolusion, and OBEYS REAL/COEFF SPACE CONVENTIONS on output, PROVIDED they are obeyed on input. upper: Points to location in upper data array corresponding to origin of lower array. lower: Points to origin of lower array sign: Sign of transform, + is Idag, - is Jdag (should be I and J, non??(Torkel)) reversefilter: reverse-time filter coefficients (2d array: [x,y,z][filter]) forwardfilter: forward-time filter coefficients (2d array: [x,y,z][filter]) fullfilter: full filter nux, nuy, nuz: dimensions of upper array nlx, nly, nlz: dimensions of lower array hfl: length of half-filters work: workspace of size at least (nlx+3*hfl)*(nly+3*hfl)*(nlz+3*hfl) */ void aconvP3d(double upper[], double lower[], int sign, double reversefilter[], double forwardfilter[], double fullfilter[], int nux, int nuy, int nuz, int nlx, int nly, int nlz, int hfl, double work[],double work_extra[]) { int i,j; /* Constants giving region of workspace which must be computed */ int nx=nlx+2*hfl; int ny=nly+2*hfl; int nz=nlz+2*hfl; /* Constants giving total size of workspace, including padding for factored convolultion strategy */ int nwx=nx+hfl; int nwy=ny+hfl; int nwz=nz+hfl; int nwyz=nwy*nwz; /* Constants describing filling of results into lower region */ int nlyz=nly*nlz, fptl= 2*(hfl/2), lptl= 2*(hfl/2)+nlz-1, fsetl=2*(hfl/2), lsetl=2*(hfl/2)+nlx-1; double *workp,*upperp,*lowerp; double *work_extrap; /* INDEXING NOTES: (1) Our basic indexing refers to the work array, which has a buffer region that circumscribes the lower level. The beginning of the lower level is thus at (2(hfl/2),2(hfl/2),2(hfl/2)) [in INTEGER arthimetic in the indexing below. (2) "upper" is defined to reference the point in the upper array that corresponds to the origin of the lower array. There are two things needed to translate properly: a homogeneous scale factor of two AND to adjust for the fact that the origin of the work array refers to the point (-(hfl/2),-(hfl/2),-(hfl/2)) from the origin of the lower array in the upper array. The scale factor is handled explicitly in the code below. To adjust for the issue of the origin, we now simply include this shift into the pointer. (3) "lower" references the origin of the "lower" array where the output is supposed to go. We now shift it so that convsetspo_ can use the referencing of the work array. */ upperp=upper-((1*nuy+1)*nuz+1)*(hfl/2); lowerp=lower-((2*nly+2)*nlz+2)*(hfl/2); /* Convolution of each yz plane */ for (i=0; i