#include "Bdef.h"
#ifdef T3EReductErr
#define DefCombTop '1'
#endif
#if (INTFACE == C_CALL)
void Cdgamx2d(int ConTxt, char *scope, char *top, int m, int n, double *A,
int lda, int *rA, int *cA, int ldia, int rdest, int cdest)
#else
F_VOID_FUNC dgamx2d_(int *ConTxt, F_CHAR scope, F_CHAR top, int *m, int *n,
double *A, int *lda, int *rA, int *cA, int *ldia,
int *rdest, int *cdest)
#endif
/*
* -- V1.1 BLACS routine --
* University of Tennessee, May 1, 1996
* Written by Clint Whaley.
*
* Purpose
* =======
* Combine amx operation for double precision rectangular matrices.
*
* Arguments
* =========
*
* ConTxt (input) Ptr to int
* Index into MyConTxts00 (my contexts array).
*
* SCOPE (input) Ptr to char
* Limit the scope of the operation.
* = 'R' : Operation is performed by a process row.
* = 'C' : Operation is performed by a process column.
* = 'A' : Operation is performed by all processes in grid.
*
* TOP (input) Ptr to char
* Controls fashion in which messages flow within the operation.
*
* M (input) Ptr to int
* The number of rows of the matrix A. M >= 0.
*
* N (input) Ptr to int
* The number of columns of the matrix A. N >= 0.
*
* A (output) Ptr to double precision two dimensional array
* The m by n matrix A. Fortran77 (column-major) storage
* assumed.
*
* LDA (input) Ptr to int
* The leading dimension of the array A. LDA >= M.
*
* RA (output) Integer Array, dimension (LDIA, N)
* Contains process row that the amx of each element
* of A was found on: i.e., rA(1,2) contains the process
* row that the amx of A(1,2) was found on.
* Values are left on process {rdest, cdest} only, others
* may be modified, but not left with interesting data.
* If rdest == -1, then result is left on all processes in scope.
* If LDIA == -1, this array is not accessed, and need not exist.
*
* CA (output) Integer Array, dimension (LDIA, N)
* Contains process column that the amx of each element
* of A was found on: i.e., cA(1,2) contains the process
* column that the max/min of A(1,2) was found on.
* Values are left on process {rdest, cdest} only, others
* may be modified, but not left with interesting data.
* If rdest == -1, then result is left on all processes in scope.
* If LDIA == -1, this array is not accessed, and need not exist.
*
* LDIA (input) Ptr to int
* If (LDIA == -1), then the arrays RA and CA are not accessed.
* ELSE leading dimension of the arrays RA and CA. LDIA >= M.
*
* RDEST (input) Ptr to int
* The process row of the destination of the amx.
* If rdest == -1, then result is left on all processes in scope.
*
* CDEST (input) Ptr to int
* The process column of the destination of the amx.
* If rdest == -1, then CDEST ignored.
*
* ------------------------------------------------------------------------
*/
{
void BI_ArgCheck(int, int, char *, char, char, char, int, int, int, int,
int *, int *);
void BI_UpdateBuffs(BLACBUFF *);
BLACBUFF *BI_GetBuff(int);
int BI_BuffIsFree(BLACBUFF *, int);
BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
BI_MPI_Datatype, int *);
BLACBUFF *BI_Pack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
BI_MPI_Datatype BI_GetMpiGeType(BLACSCONTEXT *, int, int, int,
BI_MPI_Datatype, int *);
void BI_Unpack(BLACSCONTEXT *, BVOID *, BLACBUFF *, BI_MPI_Datatype);
void BI_MringComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
int, int);
void BI_TreeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR,
int, int);
void BI_BeComb(BLACSCONTEXT *, BLACBUFF *, BLACBUFF *, int, VVFUNPTR);
void BI_dvvamx(int, char *, char *);
void BI_dvvamx2(int, char *, char *);
void BI_dMPI_amx(void *, void *, int *, BI_MPI_Datatype *);
void BI_dMPI_amx2(void *, void *, int *, BI_MPI_Datatype *);
/*
* Variable Declarations
*/
VVFUNPTR vvop;
BLACBUFF *bp, *bp2;
BLACSCONTEXT *ctxt;
char ttop, tscope;
int i, j, N, dest, idist, length, tlda, tldia, trdest, ierr;
int len[2];
BI_MPI_Aint disp[2];
BI_MPI_Datatype dtypes[2];
BI_MPI_Op BlacComb;
BI_MPI_Datatype MyType;
BI_DistType *dist, mydist;
extern BLACBUFF *BI_ActiveQ;
extern BLACBUFF BI_AuxBuff;
MGetConTxt(Mpval(ConTxt), ctxt);
ttop = F2C_CharTrans(top);
ttop = Mlowcase(ttop);
tscope = F2C_CharTrans(scope);
tscope = Mlowcase(tscope);
/*
* If the user has set the default combine topology, use it instead of
* BLACS default
*/
#ifdef DefCombTop
if (ttop == ' ') ttop = DefCombTop;
#endif
if (Mpval(cdest) == -1) trdest = -1;
else trdest = Mpval(rdest);
#if (BlacsDebugLvl > 0)
BI_ArgCheck(Mpval(ConTxt), RT_COMB, __FILE__, tscope, 'u', 'u', Mpval(m),
Mpval(n), Mpval(lda), 1, &trdest, Mpaddress(cdest));
if (Mpval(ldia) < Mpval(m))
{
if (Mpval(ldia) != -1)
BI_BlacsWarn(Mpval(ConTxt), __LINE__, __FILE__,
"LDIA too small (LDIA=%d, but M=%d)", Mpval(ldia),
Mpval(m));
}
#endif
if (Mpval(lda) >= Mpval(m)) tlda = Mpval(lda);
else tlda = Mpval(m);
if (Mpval(ldia) < Mpval(m)) tldia = Mpval(m);
else tldia = Mpval(ldia);
switch(tscope)
{
case 'r':
ctxt->scp = &ctxt->rscp;
if (trdest == -1) dest = -1;
else dest = Mpval(cdest);
break;
case 'c':
ctxt->scp = &ctxt->cscp;
dest = trdest;
break;
case 'a':
ctxt->scp = &ctxt->ascp;
if (trdest == -1) dest = -1;
else dest = Mvkpnum(ctxt, trdest, Mpval(cdest));
break;
default:
BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown scope '%c'",
tscope);
}
/*
* It's not defined how MPI reacts to 0 element reductions, so use BLACS 1-tree
* topology if we've got one
*/
if (ttop == ' ')
if ( (Mpval(m) < 1) || (Mpval(n) < 1) || (ctxt->TopsRepeat) ) ttop = '1';
N = Mpval(m) * Mpval(n);
/*
* If process who has amx is to be communicated, must set up distance
* vector after value vector
*/
if (Mpval(ldia) != -1)
{
vvop = BI_dvvamx;
length = N * sizeof(double);
i = length % sizeof(BI_DistType); /* ensure dist vec aligned correctly */
if (i) length += sizeof(BI_DistType) - i;
idist = length;
length += N * sizeof(BI_DistType);
/*
* For performance, insist second buffer is at least 8-byte aligned
*/
j = 8;
if (sizeof(double) > j) j = sizeof(double);
i = length % j;
if (i) length += j - i;
i = 2 * length;
bp = BI_GetBuff(i);
bp2 = &BI_AuxBuff;
bp2->Buff = &bp->Buff[length];
BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
/*
* Fill in distance vector
*/
if (dest == -1) mydist = ctxt->scp->Iam;
else mydist = (ctxt->scp->Np + ctxt->scp->Iam - dest) % ctxt->scp->Np;
dist = (BI_DistType *) &bp->Buff[idist];
for (i=0; i < N; i++) dist[i] = mydist;
/*
* Create the MPI datatype holding both user's buffer and distance vector
*/
len[0] = len[1] = N;
disp[0] = 0;
disp[1] = idist;
dtypes[0] = BI_MPI_DOUBLE;
dtypes[1] = BI_MpiDistType;
#ifdef ZeroByteTypeBug
if (N > 0)
{
#endif
i = 2;
BI_MPI_Type_struct(i, len, disp, dtypes, &MyType, ierr);
BI_MPI_Type_commit(&MyType, ierr);
bp->N = bp2->N = 1;
bp->dtype = bp2->dtype = MyType;
#ifdef ZeroByteTypeBug
}
else
{
bp->N = bp2->N = 0;
bp->dtype = bp2->dtype = BI_MPI_INT;
}
#endif
}
else
{
vvop = BI_dvvamx2;
length = N * sizeof(double);
/*
* If A is contiguous, we can use it as one of our buffers
*/
if ( (Mpval(m) == tlda) || (Mpval(n) == 1) )
{
bp = &BI_AuxBuff;
bp->Buff = (char *) A;
bp2 = BI_GetBuff(length);
}
else
{
bp = BI_GetBuff(length*2);
bp2 = &BI_AuxBuff;
bp2->Buff = &bp->Buff[length];
BI_dmvcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
}
bp->N = bp2->N = N;
bp->dtype = bp2->dtype = BI_MPI_DOUBLE;
}
switch(ttop)
{
case ' ': /* use MPI's reduction by default */
i = 1;
if (Mpval(ldia) == -1)
{
BI_MPI_Op_create(BI_dMPI_amx2, i, &BlacComb, ierr);
}
else
{
BI_MPI_Op_create(BI_dMPI_amx, i, &BlacComb, ierr);
BI_AuxBuff.Len = N; /* set this up for the MPI OP wrappers */
}
if (trdest != -1)
{
BI_MPI_Reduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb, dest,
ctxt->scp->comm, ierr);
if (ctxt->scp->Iam == dest)
{
BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
if (Mpval(ldia) != -1)
BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
(BI_DistType *) &bp2->Buff[idist],
trdest, Mpval(cdest));
}
}
else
{
BI_MPI_Allreduce(bp->Buff, bp2->Buff, bp->N, bp->dtype, BlacComb,
ctxt->scp->comm, ierr);
BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp2->Buff);
if (Mpval(ldia) != -1)
BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
(BI_DistType *) &bp2->Buff[idist],
trdest, Mpval(cdest));
}
BI_MPI_Op_free(&BlacComb, ierr);
if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
if (N > 0)
#endif
BI_MPI_Type_free(&MyType, ierr);
if (BI_ActiveQ) BI_UpdateBuffs(NULL);
return;
break;
case 'i':
BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 1);
break;
case 'd':
BI_MringComb(ctxt, bp, bp2, N, vvop, dest, -1);
break;
case 's':
BI_MringComb(ctxt, bp, bp2, N, vvop, dest, 2);
break;
case 'm':
BI_MringComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nr_co);
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ttop-47);
break;
case 'f':
BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, FULLCON);
break;
case 't':
BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, ctxt->Nb_co);
break;
case 'h':
/*
* Use bidirectional exchange if everyone wants answer
*/
if ( (trdest == -1) && !(ctxt->TopsCohrnt) )
BI_BeComb(ctxt, bp, bp2, N, vvop);
else
BI_TreeComb(ctxt, bp, bp2, N, vvop, dest, 2);
break;
default :
BI_BlacsErr(Mpval(ConTxt), __LINE__, __FILE__, "Unknown topology '%c'",
ttop);
}
if (Mpval(ldia) != -1)
#ifdef ZeroByteTypeBug
if (N > 0)
#endif
BI_MPI_Type_free(&MyType, ierr);
/*
* If I am selected to receive answer
*/
if ( (ctxt->scp->Iam == dest) || (dest == -1) )
{
/*
* Translate the distances stored in the latter part of bp->Buff into
* process grid coordinates, and output these coordinates in the
* arrays rA and cA.
*/
if (Mpval(ldia) != -1)
BI_TransDist(ctxt, tscope, Mpval(m), Mpval(n), rA, cA, tldia,
dist, trdest, Mpval(cdest));
/*
* Unpack the amx array
*/
if (bp != &BI_AuxBuff) BI_dvmcopy(Mpval(m), Mpval(n), A, tlda, bp->Buff);
}
}
syntax highlighted by Code2HTML, v. 0.9.1