/* DFT++ is a density functional package developed by the research group of Professor Tomas Arias Copyright 1996-2003 Sohrab Ismail-Beigi This file is part of DFT++. DFT++ is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. DFT++ is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with DFT++; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Please see the file CREDITS for a list of authors. For academic users, we request that publications using results obtained with this software reference "New algebraic formulation of density functional calculation," by Sohrab Ismail-Beigi and T.A. Arias, Computer Physics Communications 128:1-2, 1-45 (June 2000). and, if using the wavelet basis, further reference "Multiresolution analysis of electronic structure: semicardinal and wavelet bases," T.A. Arias, Reviews of Modern Physics 71:1, 267-311 (January 1999). and "Robust ab initio calculation of condensed matter: transparent convergence through semicardinal multiresolution analysis,'' I.P. Daykov, T.A. Arias, and Torkel D. Engeness, Physical Review Letters, 90:21, 216402 (May 2003). For your convenience, preprints of the above articles may be obtained from http://arXiv.org/abs/cond-mat/9909130, 9805262, and 0204411, respectively. */ #include "header.h" /********************* * copy constructor * *********************/ ColumnBundle::ColumnBundle(const ColumnBundle &C) { // copy the structure C.copy_structure_to(*this); // copy the data! data = C.data; } /*********************** * regular constructor * ***********************/ ColumnBundle::ColumnBundle(int length, int ncol, Basis *basis) { tot_ncols = ncol; my_ncols = ncol; start_ncol = 0; end_ncol = ncol-1; col_length = length; init(tot_ncols, basis, "distributed"); } ColumnBundle::ColumnBundle(int ncols, Basis *basis, const char *distr) { init(ncols, basis, distr); } ///////////////////////////////////////////// // Destructor: frees the memory used // ///////////////////////////////////////////// ColumnBundle::~ColumnBundle() { destroy_bands(my_ncols,col); myfree(col); data.free(); } /***************************************************************************** * Here, we take an existing ColumnBundle, and make a new one using * * it as a template. This entails creating our own Column structure, then * * calling init() to actually allocate the memory chunk (and point the Column* * datapointers to the right place). * *****************************************************************************/ void ColumnBundle::copy_structure_to(ColumnBundle &C) const { C.tot_ncols = tot_ncols; C.my_ncols = my_ncols; C.start_ncol = start_ncol; C.end_ncol = end_ncol; C.col_length = col_length; C.distributed = distributed; // dft_log("&(col[0]->basis->nbasis) = %d, col[0]->basis->nbasis= %d\n", // &(col[0]->basis->nbasis), col[0]->basis->nbasis); C.init(tot_ncols, col[0]->basis, C.distributed?"distributed":"local"); } /***************************************************************************** * This creates a ColumnBundle from scratch, using the number of * columns, * the quantum number, the basis spec. "str" tells us if we * are "distributed"* or not. * *****************************************************************************/ void ColumnBundle::init(int ncols, Basis *basis, const char *str) { tot_ncols = ncols; // let's work out how to distribute ourselves distributed = FALSE; #ifdef DFT_MPI if(!strcmp(str, "distributed")) distributed = TRUE; #endif if(distributed) distribute_cols(); else{ my_ncols = tot_ncols; start_ncol = 0; end_ncol = tot_ncols-1; } // allocate our array of columns col = (CoeffSpaceWavefunctionColumn **)mymalloc(my_ncols*sizeof(CoeffSpaceWavefunctionColumn*), "ColumnBundle::init(int ncols, Basis &basis, const char *str)", "col"); // actually create our column structure create_bands(my_ncols, col, basis); // currently, lets assume each columns is the same length. // if not, then major changes will have to be made to ^ and a bunch // of other things as well // the length of the column is a basis dependent thing, // so we can only set it after the columns have been created. // here we set the bundle col_length equal to these if(basis != NULL) // ipd: 08/13/02, sometimes with MPI we have mpi_processes // that hold bundles with no columns. so we can't dereference // col. Let's take the length from the basis, so that it is // properly set, even if there are no columns // (we need this so that many consistency checks keep working) // col_length = col[0]->length; col_length = basis->nbasis; // initialize us (get memory, set datapointers inside the columns allocate_memory(); } /* * Allocate/free an array of column bundles according to the ones in * bloch states: used in the minimization algorithm */ ColumnBundle **alloc_ColumnBundle_array(int nbundles,BlochState *states) { ColumnBundle **CB = (ColumnBundle **)mymalloc(nbundles*sizeof(ColumnBundle*), "alloc_ColumnBundle_aray()", "CB"); int i; for(i=0; iinit_wavefunction(*b); } } void destroy_bands(int ncols, CoeffSpaceWavefunctionColumn **cols) { int i; for(i = 0; i < ncols; i++) delete cols[i]; } /************************************************************************* * allocate_memory() assumes that there is a valid Column structure in * * place and it allocates memory for our data. then it points the * * datapointers inside our columns to the appropriate place in the * * bigass chunk * *************************************************************************/ void ColumnBundle::allocate_memory() { if (my_ncols==0) return; data.init(my_ncols*col_length); for(int i=0; i < my_ncols; i++){ col[i]->length = col_length; col[i]->data.ndata = col_length; col[i]->data.d = data.d+i*col_length; } } void ColumnBundle::get_scratch_memory() { data.get_temp(my_ncols*col_length); for(int i=0; i < my_ncols; i++){ col[i]->data.ndata = col_length; col[i]->data.d = data.d+i*col_length; } } void ColumnBundle::free_scratch_memory() { data.release_temp(); } ///////////////////////////////////////////// // Generate my_ncols, start_ncol, end_ncol // // from tot_ncols for a distributed // // ColumnBundle case // // This must also work for the single node // // case where it just distributed all // // columns to a single node (i.e. serial // // case). // // The distribution is as follows: // // d = tot_ncols/N_Procs // // r = tot_ncols%N_Procs // // the first r processes get d+1 columns // // and the rest get d columns // // and the order of distribution is just // // linear in the columns, i.e. columns // // 0..d go to process 0, columns d+1,2*d+1 // // go to process 1, etc. // ///////////////////////////////////////////// void ColumnBundle::distribute_cols() { int d,r; d = tot_ncols / System::Get_N_Procs(); r = tot_ncols % System::Get_N_Procs(); // If the process is one of the first r, then it gets d+1 columns if (System::Get_procID() < r) { my_ncols = d+1; start_ncol = (d+1)*System::Get_procID(); } // Otherwise, it gets d columns else { my_ncols = d; start_ncol = (d+1)*r + d*(System::Get_procID()-r); } // the last column owned by the process (inclusive) end_ncol = start_ncol + my_ncols - 1; } ///////////////////////////////////////////// // Get Processor ID from column number // // Namely, given a column_number, find the // // processor number of its owner based on // // above distribution // ///////////////////////////////////////////// int ColumnBundle::ncol_to_ID(int col_index) const { int d,r,id; d = tot_ncols / System::Get_N_Procs(); r = tot_ncols % System::Get_N_Procs(); // If it's one of the columns belonging to the first r processes // which have d+1 columns each... if (col_index < r*(d+1)) id = col_index / (d+1); // otherwise, it belongs to the set of processes with d columns else id = (col_index - r) / d; // In the unlikely but bizzare case... if (id >= System::Get_N_Procs() || id < 0) die("In ColumnBundle::ncol_to_ID, id >= N_Procs or id < 0\n"); return id; } ////////////////////////////////////// // // // Operator functions // // // ////////////////////////////////////// /* Assignment: nonstandard in that it returns void. To make it standard, * replace void -> ColumnBundle and uncomment the return *this; */ void ColumnBundle::operator=(const ColumnBundle &Y) { /* The sizes must agree */ if (tot_ncols != Y.tot_ncols) die("In ColumnBundle::operator=, tot_ncols != Y.tot_ncols\n"); if (my_ncols != Y.my_ncols) die("In ColumnBundle::operator=, my_ncols != Y.my_ncols\n"); if (col_length != Y.col_length) die("In ColumnBundle::operator=, col_length != Y.col_length\n"); if (distributed != Y.distributed) die("In ColumnBundle::operator=, distributed != Y.distributed\n"); data = Y.data; } void ColumnBundle::zero_out(void) { data.zero_out(); } /************************************************ * IPD: Linear Algebra - call the array algebra * ***********************************************/ void ColumnBundle::operator+=(const ColumnBundle &in) { // check the quantum numbers //............... //............... data += in.data; } void ColumnBundle::operator-=(const ColumnBundle &in) { data -= in.data; } void ColumnBundle::operator*=(const double &s) { data *= s; } void ColumnBundle::operator*=(const complex &s) { data *= s; } void ColumnBundle::operator/=(const double &s) { data*=(1./s); } void ColumnBundle::operator/=(const complex &s) { data*= (1./s); } /**** Binary operators - call the unary operators ********/ ColumnBundle operator+(const ColumnBundle &a, const ColumnBundle &b) { ColumnBundle out(a); out+=b; return out; } ColumnBundle operator-(const ColumnBundle &a, const ColumnBundle &b) { ColumnBundle out(a); out -= b; return out; } ColumnBundle operator*(const double &d, const ColumnBundle &b) { ColumnBundle out(b); out *= d; return out; } ColumnBundle operator*(const ColumnBundle &b, const double &d) { ColumnBundle out(b); out *= d; return out; } ColumnBundle operator*(const complex &d, const ColumnBundle &b) { ColumnBundle out(b); out *= d; return out; } ColumnBundle operator*(const ColumnBundle &b, const complex &d) { ColumnBundle out(b); out *= d; return out; } ColumnBundle operator/(const ColumnBundle &b, const complex &c) { ColumnBundle out(b); out /= c; return out; } /* Does Yout += s*Yin */ void scale_accumulate(real r,const ColumnBundle &Yin,ColumnBundle &Yout) { scale_accumulate(r, Yin.data, Yout.data); } /* Does Yout[k] += s * Yin[k] */ void scale_accumulate(int nbundles,real r,ColumnBundle **Yin, ColumnBundle **Yout) { for (int k=0; k < nbundles; k++) scale_accumulate(r,*Yin[k],*Yout[k]); } /* Does Yout = s1*Y1 + s2*Y2 */ void scaled_sum(real r1,ColumnBundle &Y1, real r2,ColumnBundle &Y2, ColumnBundle &Yout) { if (Y1.my_ncols != Y2.my_ncols || Y1.my_ncols != Yout.my_ncols || Y1.col_length != Y2.col_length || Y1.col_length != Yout.col_length) die("Incomaptible sizes in scaled_sum(s1,Y1,s2,Y2,Yout)"); scaled_sum(r1, Y1.data, r2, Y2.data, Yout.data); } /* Does Yout[i] = s1*Y1[i] + s2*Y2[i] */ void scaled_sum(int nbundles, real r1,ColumnBundle **Y1, real r2,ColumnBundle **Y2, ColumnBundle **Yout) { for (int i=0; i < nbundles; i++) scaled_sum(r1,*Y1[i],r2,*Y2[i],*Yout[i]); } /* Sum of absolute squares of all elements in Y */ real abs2(const ColumnBundle &Y) { real Y2; int i,j; Y2 = 0.0; for (i=0; i < Y.my_ncols; i++) for (j=0; j < Y.col_length; j++){ #if defined WFS_COMPLEX Y2 += Y.col[i]->data.d[j].x * Y.col[i]->data.d[j].x + Y.col[i]->data.d[j].y * Y.col[i]->data.d[j].y; #elif defined WFS_REAL Y2 += Y.col[i]->data.d[j] * Y.col[i]->data.d[j]; #else #error wavefnctions are neither real nor complex! #endif } // If the object is distributed, do a global // summation across the processors. if (Y.distributed) { #ifdef DFT_MPI #ifdef DFT_PROFILING timerOn(13); // Turn on other MPI_Allreduce timer counterIncr(21); // other reduces counter counterIncr(22,sizeof(double)); // Bytes reduced #endif // DFT_PROFILING real temp = 0.0; MPI_Allreduce ( &Y2, &temp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); Y2 = temp; #ifdef DFT_PROFILING timerOff(13); // Turn off other MPI_Allreduce timer #endif // DFT_PROFILING #endif // DFT_MPI } return Y2; } /* Sum of absolute squares of all elements in array of ColumnBundles Y: * i.e. it is just the sum of abs2() on all nbundle elements of Y */ real abs2(int nbundles,ColumnBundle **Y) { int i; real Y2; Y2 = 0.0; for (i=0; i < nbundles; i++) Y2 += abs2(*Y[i]); return Y2; } /* Take "dot-product" of two ColumnBundles: REsum the diagonals of Y1^Y2 */ real dot(const ColumnBundle &Y1,const ColumnBundle &Y2) { if (Y1.tot_ncols != Y2.tot_ncols) die("Y1.tot_ncols != Y2.tot_ncols in dot_ColumnBundles\n"); if (Y1.my_ncols != Y2.my_ncols) { dft_log(DFT_SILENCE, "Different distribution of the same size arrays!!\n"); die("Y.my_ncols != Y2.my_ncols in dot_ColumnBundles\n"); } if (Y1.col_length != Y2.col_length) die("Y1.col_length != Y2.col_length in dot_ColumnBundles\n"); if (Y1.distributed != Y2.distributed) die("Y1, Y2 are not the same distributed type\n"); int j; int i; real d; d = 0.0; for (i=0; i < Y1.my_ncols; i++) for (j=0; j < Y1.col_length; j++){ /* do d += RE(conj(Y1.col[i].c[j])*Y2.col[i].c[j];) */ #ifdef WFS_COMPLEX d += Y1.col[i]->data.d[j].x*Y2.col[i]->data.d[j].x + Y1.col[i]->data.d[j].y*Y2.col[i]->data.d[j].y ; #elif defined WFS_REAL d += Y1.col[i].data.d[j]*Y2.col[i].data.d[j]; #else #error wavefunctions are neither real nor complex! #endif } // If the object is distributed, then do a global summation if ( Y1.distributed ){ #ifdef DFT_MPI real temp = 0.0; #ifdef DFT_PROFILING timerOn(13); // Turn on other MPI_Allreduce timer counterIncr(21,1); // reduce count counterIncr(22,sizeof(real)); // byte count #endif // DFT_PROFILING MPI_Allreduce ( &d, &temp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); d = temp; #ifdef DFT_PROFILING timerOff(13); // Turn off other MPI_Allreduce timer #endif // DFT_PROFILING #endif // DFT_MPI } return d; } /* Take "dot-product" of nbundles pairs of ColumnBundles: i.e. * do dot_ColumnBundles does, but just loop over the pairs. */ real dot(int nbundles,ColumnBundle **Y1,ColumnBundle **Y2) { int i; real d; d = 0.0; for (i=0; i < nbundles; i++) d += dot(*Y1[i],*Y2[i]); return d; } Matrix operator^(const ColumnBundle &Y1,const ColumnBundle &Y2) { // Y1 is Nxn1, Y2 is Nxn2, and Y1dY2 is n1xn2. int n1, n2, N; n1 = Y1.my_ncols; n2 = Y2.my_ncols; N = Y1.col_length; if (Y1.col_length != Y2.col_length) die("In operator ^ on ColumnBundles, Y1.col_length != Y2.col_length\n"); #ifdef DFT_PROFILING timerOn(3); // Y1^Y2 timer counterIncr(1); // Y1^Y2 count // Y1^Y2 flop count counterIncr(0, 8.0*(double)n1*(double)Y2.tot_ncols*(double)N); #endif // DFT_PROFILING Matrix Y1dY2(Y1.tot_ncols,Y2.tot_ncols); // zero out Y1dY2 Y1dY2.zero_out(); #ifdef DFT_MPI // with MPI, Y1 or Y2 might be distributed // both Y1 and Y2 are distribyted if ( (Y1.distributed == 1) && (Y2.distributed == 1) ) { // use the code in dist_multiply.c by Ken Esler. do_Y1dag_Y2_distributed(Y1, Y2, Y1dY2); #ifdef DFT_PROFILING timerOff(3); // turn off Y1^Y2 timer counterIncr(2); // Distributed Y1^Y2 counter #endif // DFT_PROFILING // done return Y1dY2; } // Y1 is local, Y2 is distributed else if ( Y1.distributed == 0 ) { // At the end, this needs a global reduction (see below). // do the multiply with proper offset Y1dagY2_block_matrix_mult(Y1,Y2,Y1dY2,n1,n2,N,0,Y2.start_ncol,0,0); } // Y1 is distributed, Y2 is local else if ( Y2.distributed == 0 ) { // At the end, this needs a global reduction (see below). // do the multiply with proper offset Y1dagY2_block_matrix_mult(Y1,Y2,Y1dY2,n1,n2,N,Y1.start_ncol,0,0,0); } #endif //DFT_MPI // Y1 and Y2 are both local. if ( (Y1.distributed == 0) && (Y2.distributed == 0) ) { // do the multiply Y1dagY2_block_matrix_mult(Y1,Y2,Y1dY2,n1,n2,N,0,0,0,0); #ifdef DFT_PROFILING timerOff(3); // turn off Y1dag Y2 timer #endif // DFT_PROFILING // both are local, so we've got the final answer! We're done. return Y1dY2; } #ifdef DFT_MPI // If we are running MPI and have distributed ColumnBundles, // then we must do a global reduction of all the processors's // Y1dY2 matrices into result to get the final answer. Matrix result(Y1.tot_ncols, Y2.tot_ncols); // zero out where the final answer is going result.zero_out(); // do the global sum over all processors #ifdef DFT_PROFILING timerOn(12); // Turn on Y1^Y2 MPI_Allreduce timer // Number of bytes reduced counterIncr(3,sizeof(scalar)*(double)Y1.tot_ncols*(double)Y2.tot_ncols); #endif MPI_Allreduce ( &(Y1dY2.c[0]), &(result.c[0]), Y1.tot_ncols * Y2.tot_ncols * SCALAR_SIZE, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); #ifdef DFT_PROFILING timerOff(12); // Turn off Y1^Y2 MPI_Allreduce timer #endif // DFT_PROFILING #else // DFT_MPI // We aren't running MPI, then we don't do any reduction, and // the final answer is just Y1dY2 and we'll just make result // point to it. Matrix &result = Y1dY2; #endif // DFT_MPI return result; } /* * Multiply ColumnBundle by a matrix on the right: * YM = Y*M */ ColumnBundle operator*(const ColumnBundle &Y, const Matrix &M) { if (Y.tot_ncols != M.nr) die("In ColumnBundle::operator*, Y.tot_ncols != M.nr\n"); ColumnBundle YM(Y.col_length, M.nc); /* Call the router function below!! */ do_ColumnBundle_matrix_mult(Y,M,YM,0); return YM; } void mult(const ColumnBundle Y, const Matrix &M, ColumnBundle &YM) { do_ColumnBundle_matrix_mult(Y,M,YM,0); } /* * Does YM = Y*M or YM += Y*M. */ void do_ColumnBundle_matrix_mult(const ColumnBundle &Y, const Matrix &M, ColumnBundle &YM, int accum) { #ifdef DFT_PROFILING timerOn(4); // turn on Y * M timer counterIncr(5); // Y*M count #endif // DFT_PROFILING if (Y.tot_ncols != M.nr) die("In do_ColumnBundle_matrix_mult(), Y.tot_ncols != M.nr\n"); if (Y.col_length != YM.col_length) die("In do_ColumnBundle_matrix_mult(), Y.col_length != YM.col_length\n"); if (M.nc != YM.tot_ncols) die("In do_ColumnBundle_matrix_mult(), M.nc != YM.tot_ncols\n"); // We assume that YM and Y both have valid and congruent column structures int N = Y.col_length; int ncols = YM.my_ncols; int nrows = Y.my_ncols; #ifdef DFT_PROFILING // Flop count of Y*M counterIncr(4,8.0*(double)N*(double)ncols*Y.tot_ncols); #endif /* Zero out final result if not in accumulation mode */ if (!accum) YM.zero_out(); // both YM and Y are distributed if ( (Y.distributed == 1) && (YM.distributed == 1) ) { // Use code in dist_multiply.c do_Y_M_mult_distributed(Y, M, YM, accum); #ifdef DFT_PROFILING counterIncr(6); // distributed Y*M count #endif // DFT_PROFILING } // Y and YM are both local. else if ( (Y.distributed == 0) && (YM.distributed == 0) ) { Y_M_block_matrix_mult(Y,M,YM,N,nrows,ncols,0,0,0,accum); } // Y is local, YM is distributed. else if ( Y.distributed == 0 ) { Y_M_block_matrix_mult(Y,M,YM,N,nrows,ncols,0,YM.start_ncol,0,accum); } // Y is distributed, YM is local. else if ( YM.distributed == 0 ) { #ifdef DFT_MPI die("YM = Y*M where Y is distributed and YM is local is not supported\n"); /* // If in MPI mode, then we have to create a new ColumnBundle */ /* // to hold the local result before we sum up accros processors. */ /* ColumnBundle Ytemp(YM.tot_ncols, YM.col_length, YM.basis, "local"); */ /* copy_innards_ColumnBundle(&YM, &Ytemp); */ /* // If in accum mode, then the zeroth processor starts with YM */ /* // in its local result and accumulates into it. All other proc. */ /* // and for all other cases start with zero in their local result. */ /* if (accum && System::Get_procID()==0) */ /* Ytemp = YM; */ /* else */ /* Ytemp.zero_out(); */ /* #else */ /* // Non MPI mode... just make Ytemp point to YM and zero it out */ /* // if not in accum mode. */ /* ColumnBundle &Ytemp = YM; */ /* if (!accum) */ /* Ytemp.zero_out(); */ #endif /* // do the multiply into Ytemp */ /* Y_M_block_matrix_mult(Y,M,Ytemp,N,nrows,ncols,Y.start_ncol,0,0,accum); */ /* #ifdef DFT_MPI */ /* #ifdef DFT_PROFILING */ /* timerOn(13); // Turn on other MPI_Allreduce timer */ /* counterIncr(21); // Number of other reduces */ /* // Bytes reduced */ /* counterIncr(22,sizeof(scalar)*(double)N*(double)ncols); */ /* #endif */ /* // do a global reduction on Ytemp, store in YM; */ /* MPI_Allreduce ( &(Ytemp.col[0].c[0]), &(YM.col[0].c[0]), */ /* N * ncols * SCALAR_SIZE, */ /* MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD ); */ /* #ifdef DFT_PROFILING */ /* timerOff(13); // Turn off other MPI_Allreduce timer */ /* #endif */ /* #endif // DFT_MPI */ } #ifdef DFT_PROFILING timerOff(4); // turn off Y * M timer #endif // DFT_PROFILING } /* randomize: this is basis dependent, so call the * relevant function on the columns */ void ColumnBundle::randomize() { for(int i = 0; i < my_ncols; i++) col[i]->randomize(); } // Write ColumnBundle in binary to file fname ////////////////////////////////////////////////// // Processor identified by MPI_IO has regular // // IO functionalities. See MPI_Get_Attr for // // furthur details. For now, assume processor 0 // // is this processor. // ////////////////////////////////////////////////// void ColumnBundle::write(char *fname) { FILE *fp; fp = dft_fopen(fname, "w"); write_stream(fp); dft_fclose(fp); } void write_ColumnBundle_array(char *fname,int nbundles,ColumnBundle **Y) { FILE *fp = NULL; int i; fp = dft_fopen(fname,"w"); for (i=0; i < nbundles; i++) Y[i]->write_stream(fp); dft_fclose(fp); } /* Write ColumnBundle in binary to the already open IO stream...don't * close it! */ /////////////////////////////// // See comments for write // // above. // /////////////////////////////// void ColumnBundle::write_stream(FILE *fp) { #ifdef DFT_MPI if (System::Get_procID() == System::Get_IOprocID() ) { // Temporary holding places for MPI. int nproc; int remote_ndata; // here we assume that the IO node has as many columns as the // node with the most columns CoeffSpaceWavefunctionData tmp_data(data.ndata); MPI_Status status; // CRITICAL: data.col[i].c[j] must be continuous, with j being // the inner index. // loop over all MPI processes for ( nproc = 0; nproc < System::Get_N_Procs(); nproc++ ) { // if you are at the IO MPI process, just write to file if ( nproc == System::Get_procID() ) fwrite(data.d, data.datasize, data.ndata, fp); else { // Receive the size of other proces' data into remote_ndata. MPI_Recv(&remote_ndata, 1, MPI_INT, nproc, 0, MPI_COMM_WORLD, &status); // Receive the actual data. if (remote_ndata > 0){ MPI_Recv(tmp_data.d, remote_ndata*(tmp_data.datasize/sizeof(double)), MPI_DOUBLE, nproc, 1, MPI_COMM_WORLD, &status); fwrite(tmp_data.d, tmp_data.datasize, remote_ndata, fp); } } } } else { // Send data to processor io_node for writing. MPI_Send(&data.ndata, 1, MPI_INT, System::Get_IOprocID(), 0, MPI_COMM_WORLD); if (data.ndata > 0) MPI_Send(data.d, data.ndata*(data.datasize/sizeof(double)), MPI_DOUBLE, System::Get_IOprocID(), 1, MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); // just to be safe, synchronize. #else // DFT_MPI fwrite(data.d, data.datasize, data.ndata, fp); #endif // DFT_MPI } void ColumnBundle::read(char *fname) { FILE *fp; fp = dft_fopen(fname, "r"); read_stream(fp); dft_fclose(fp); } /* Read ColumnBundle in binary form from an already open IO stream... * don't close it! */ void ColumnBundle::read_stream(FILE *fp) { #ifdef DFT_MPI if (System::Get_procID() == System::Get_IOprocID() ) { // Temporary holding places for MPI. int nproc; int remote_ndata; CoeffSpaceWavefunctionData tmp_data(data.ndata); // here we assume that the IO node // has as many columns as the // node with the most columns MPI_Status status; // CRITICAL: data.col[i].c[j] must be continuous, with j being // the inner index. // And my_ncols must NOT differ by more than 1 across processes. for ( nproc = 0; nproc < System::Get_N_Procs(); nproc++ ) { if ( nproc == System::Get_procID() ) { fread(data.d, data.datasize, data.ndata, fp); } else { MPI_Recv(&remote_ndata, 1, MPI_INT, nproc, 0, MPI_COMM_WORLD, &status); if (remote_ndata > 0){ fread(tmp_data.d, tmp_data.datasize, remote_ndata, fp); MPI_Send(tmp_data.d, remote_ndata*(tmp_data.datasize/sizeof(double)), MPI_DOUBLE, nproc, 1, MPI_COMM_WORLD); } } } } else { MPI_Status status; MPI_Send(&data.ndata, 1, MPI_INT, System::Get_IOprocID(), 0, MPI_COMM_WORLD); // Receive data from processor io_node. if (my_ncols > 0) MPI_Recv(data.d, data.ndata*(data.datasize/sizeof(double)), MPI_DOUBLE, System::Get_IOprocID(), 1, MPI_COMM_WORLD, &status); } MPI_Barrier(MPI_COMM_WORLD); // just to be safe, synchronize. #else // DFT_MPI fread(data.d, data.datasize, data.ndata, fp); #endif // DFT_MPI } /* Copies all the internal data of Y1 into Y2: Y1 -> Y2 */ /* void */ /* copy_innards_ColumnBundle(const ColumnBundle *Y1,ColumnBundle *Y2) */ /* { */ /* Y2->tot_ncols=Y1->tot_ncols; */ /* Y2->my_ncols=Y1->my_ncols; */ /* Y2->start_ncol=Y1->start_ncol; */ /* Y2->end_ncol=Y1->end_ncol; */ /* Y2->col_length=Y1->col_length; */ /* Y2->distributed=Y1->distributed; */ /* } */