Actual source code: mpimesg.c


  2: #include <petscsys.h>

  4: /*@C
  5:   PetscGatherNumberOfMessages -  Computes the number of messages a node expects to receive

  7:   Collective

  9:   Input Parameters:
 10: + comm     - Communicator
 11: . iflags   - an array of integers of length sizeof(comm). A '1' in ilengths[i] represent a
 12:              message from current node to ith node. Optionally NULL
 13: - ilengths - Non zero ilengths[i] represent a message to i of length ilengths[i].
 14:              Optionally NULL.

 16:   Output Parameters:
 17: . nrecvs    - number of messages received

 19:   Level: developer

 21:   Notes:
 22:   With this info, the correct message lengths can be determined using
 23:   PetscGatherMessageLengths()

 25:   Either iflags or ilengths should be provided.  If iflags is not
 26:   provided (NULL) it can be computed from ilengths. If iflags is
 27:   provided, ilengths is not required.

 29: .seealso: PetscGatherMessageLengths()
 30: @*/
 31: PetscErrorCode  PetscGatherNumberOfMessages(MPI_Comm comm,const PetscMPIInt iflags[],const PetscMPIInt ilengths[],PetscMPIInt *nrecvs)
 32: {
 33:   PetscMPIInt    size,rank,*recv_buf,i,*iflags_local = NULL,*iflags_localm = NULL;

 37:   MPI_Comm_size(comm,&size);
 38:   MPI_Comm_rank(comm,&rank);

 40:   PetscMalloc2(size,&recv_buf,size,&iflags_localm);

 42:   /* If iflags not provided, compute iflags from ilengths */
 43:   if (!iflags) {
 44:     if (!ilengths) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Either iflags or ilengths should be provided");
 45:     iflags_local = iflags_localm;
 46:     for (i=0; i<size; i++) {
 47:       if (ilengths[i]) iflags_local[i] = 1;
 48:       else iflags_local[i] = 0;
 49:     }
 50:   } else iflags_local = (PetscMPIInt*) iflags;

 52:   /* Post an allreduce to determine the numer of messages the current node will receive */
 53:   MPIU_Allreduce(iflags_local,recv_buf,size,MPI_INT,MPI_SUM,comm);
 54:   *nrecvs = recv_buf[rank];

 56:   PetscFree2(recv_buf,iflags_localm);
 57:   return(0);
 58: }

 60: /*@C
 61:   PetscGatherMessageLengths - Computes info about messages that a MPI-node will receive,
 62:   including (from-id,length) pairs for each message.

 64:   Collective

 66:   Input Parameters:
 67: + comm      - Communicator
 68: . nsends    - number of messages that are to be sent.
 69: . nrecvs    - number of messages being received
 70: - ilengths  - an array of integers of length sizeof(comm)
 71:               a non zero ilengths[i] represent a message to i of length ilengths[i]

 73:   Output Parameters:
 74: + onodes    - list of node-ids from which messages are expected
 75: - olengths  - corresponding message lengths

 77:   Level: developer

 79:   Notes:
 80:   With this info, the correct MPI_Irecv() can be posted with the correct
 81:   from-id, with a buffer with the right amount of memory required.

 83:   The calling function deallocates the memory in onodes and olengths

 85:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

 87: .seealso: PetscGatherNumberOfMessages()
 88: @*/
 89: PetscErrorCode  PetscGatherMessageLengths(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths[],PetscMPIInt **onodes,PetscMPIInt **olengths)
 90: {
 92:   PetscMPIInt    size,rank,tag,i,j;
 93:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
 94:   MPI_Status     *w_status = NULL;

 97:   MPI_Comm_size(comm,&size);
 98:   MPI_Comm_rank(comm,&rank);
 99:   PetscCommGetNewTag(comm,&tag);

101:   /* cannot use PetscMalloc3() here because in the call to MPI_Waitall() they MUST be contiguous */
102:   PetscMalloc2(nrecvs+nsends,&r_waits,nrecvs+nsends,&w_status);
103:   s_waits = r_waits+nrecvs;

105:   /* Post the Irecv to get the message length-info */
106:   PetscMalloc1(nrecvs,olengths);
107:   for (i=0; i<nrecvs; i++) {
108:     MPI_Irecv((*olengths)+i,1,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
109:   }

111:   /* Post the Isends with the message length-info */
112:   for (i=0,j=0; i<size; ++i) {
113:     if (ilengths[i]) {
114:       MPI_Isend((void*)(ilengths+i),1,MPI_INT,i,tag,comm,s_waits+j);
115:       j++;
116:     }
117:   }

119:   /* Post waits on sends and receivs */
120:   if (nrecvs+nsends) {MPI_Waitall(nrecvs+nsends,r_waits,w_status);}

122:   /* Pack up the received data */
123:   PetscMalloc1(nrecvs,onodes);
124:   for (i=0; i<nrecvs; ++i) {
125:     (*onodes)[i] = w_status[i].MPI_SOURCE;
126: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
127:     /* This line is a workaround for a bug in OpenMPI-2.1.1 distributed by Ubuntu-18.04.2 LTS.
128:        It happens in self-to-self MPI_Send/Recv using MPI_ANY_SOURCE for message matching. OpenMPI
129:        does not put correct value in recv buffer. See also
130:        https://lists.mcs.anl.gov/pipermail/petsc-dev/2019-July/024803.html
131:        https://www.mail-archive.com/users@lists.open-mpi.org//msg33383.html
132:      */
133:     if (w_status[i].MPI_SOURCE == rank) (*olengths)[i] = ilengths[rank];
134: #endif
135:   }
136:   PetscFree2(r_waits,w_status);
137:   return(0);
138: }

140: /*@C
141:   PetscGatherMessageLengths2 - Computes info about messages that a MPI-node will receive,
142:   including (from-id,length) pairs for each message. Same functionality as PetscGatherMessageLengths()
143:   except it takes TWO ilenths and output TWO olengths.

145:   Collective

147:   Input Parameters:
148: + comm      - Communicator
149: . nsends    - number of messages that are to be sent.
150: . nrecvs    - number of messages being received
151: - ilengths1, ilengths2 - array of integers of length sizeof(comm)
152:               a non zero ilengths[i] represent a message to i of length ilengths[i]

154:   Output Parameters:
155: + onodes    - list of node-ids from which messages are expected
156: - olengths1, olengths2 - corresponding message lengths

158:   Level: developer

160:   Notes:
161:   With this info, the correct MPI_Irecv() can be posted with the correct
162:   from-id, with a buffer with the right amount of memory required.

164:   The calling function deallocates the memory in onodes and olengths

166:   To determine nrecvs, one can use PetscGatherNumberOfMessages()

168: .seealso: PetscGatherMessageLengths() and PetscGatherNumberOfMessages()
169: @*/
170: PetscErrorCode  PetscGatherMessageLengths2(MPI_Comm comm,PetscMPIInt nsends,PetscMPIInt nrecvs,const PetscMPIInt ilengths1[],const PetscMPIInt ilengths2[],PetscMPIInt **onodes,PetscMPIInt **olengths1,PetscMPIInt **olengths2)
171: {
173:   PetscMPIInt    size,tag,i,j,*buf_s = NULL,*buf_r = NULL,*buf_j = NULL;
174:   MPI_Request    *s_waits  = NULL,*r_waits = NULL;
175:   MPI_Status     *w_status = NULL;

178:   MPI_Comm_size(comm,&size);
179:   PetscCommGetNewTag(comm,&tag);

181:   /* cannot use PetscMalloc5() because r_waits and s_waits must be contiguous for the call to MPI_Waitall() */
182:   PetscMalloc4(nrecvs+nsends,&r_waits,2*nrecvs,&buf_r,2*nsends,&buf_s,nrecvs+nsends,&w_status);
183:   s_waits = r_waits + nrecvs;

185:   /* Post the Irecv to get the message length-info */
186:   PetscMalloc1(nrecvs+1,olengths1);
187:   PetscMalloc1(nrecvs+1,olengths2);
188:   for (i=0; i<nrecvs; i++) {
189:     buf_j = buf_r + (2*i);
190:     MPI_Irecv(buf_j,2,MPI_INT,MPI_ANY_SOURCE,tag,comm,r_waits+i);
191:   }

193:   /* Post the Isends with the message length-info */
194:   for (i=0,j=0; i<size; ++i) {
195:     if (ilengths1[i]) {
196:       buf_j    = buf_s + (2*j);
197:       buf_j[0] = *(ilengths1+i);
198:       buf_j[1] = *(ilengths2+i);
199:       MPI_Isend(buf_j,2,MPI_INT,i,tag,comm,s_waits+j);
200:       j++;
201:     }
202:   }
203:   if (j != nsends) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"j %d not equal to expected number of sends %d\n",j,nsends);

205:   /* Post waits on sends and receivs */
206:   if (nrecvs+nsends) {MPI_Waitall(nrecvs+nsends,r_waits,w_status);}

208:   /* Pack up the received data */
209:   PetscMalloc1(nrecvs+1,onodes);
210:   for (i=0; i<nrecvs; ++i) {
211:     (*onodes)[i]    = w_status[i].MPI_SOURCE;
212:     buf_j           = buf_r + (2*i);
213:     (*olengths1)[i] = buf_j[0];
214:     (*olengths2)[i] = buf_j[1];
215:   }

217:   PetscFree4(r_waits,buf_r,buf_s,w_status);
218:   return(0);
219: }

221: /*

223:   Allocate a bufffer sufficient to hold messages of size specified in olengths.
224:   And post Irecvs on these buffers using node info from onodes

226:  */
227: PetscErrorCode  PetscPostIrecvInt(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscInt ***rbuf,MPI_Request **r_waits)
228: {
230:   PetscInt       **rbuf_t,i,len = 0;
231:   MPI_Request    *r_waits_t;

234:   /* compute memory required for recv buffers */
235:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

237:   /* allocate memory for recv buffers */
238:   PetscMalloc1(nrecvs+1,&rbuf_t);
239:   PetscMalloc1(len,&rbuf_t[0]);
240:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

242:   /* Post the receives */
243:   PetscMalloc1(nrecvs,&r_waits_t);
244:   for (i=0; i<nrecvs; ++i) {
245:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_INT,onodes[i],tag,comm,r_waits_t+i);
246:   }

248:   *rbuf    = rbuf_t;
249:   *r_waits = r_waits_t;
250:   return(0);
251: }

253: PetscErrorCode  PetscPostIrecvScalar(MPI_Comm comm,PetscMPIInt tag,PetscMPIInt nrecvs,const PetscMPIInt onodes[],const PetscMPIInt olengths[],PetscScalar ***rbuf,MPI_Request **r_waits)
254: {
256:   PetscMPIInt    i;
257:   PetscScalar    **rbuf_t;
258:   MPI_Request    *r_waits_t;
259:   PetscInt       len = 0;

262:   /* compute memory required for recv buffers */
263:   for (i=0; i<nrecvs; i++) len += olengths[i];  /* each message length */

265:   /* allocate memory for recv buffers */
266:   PetscMalloc1(nrecvs+1,&rbuf_t);
267:   PetscMalloc1(len,&rbuf_t[0]);
268:   for (i=1; i<nrecvs; ++i) rbuf_t[i] = rbuf_t[i-1] + olengths[i-1];

270:   /* Post the receives */
271:   PetscMalloc1(nrecvs,&r_waits_t);
272:   for (i=0; i<nrecvs; ++i) {
273:     MPI_Irecv(rbuf_t[i],olengths[i],MPIU_SCALAR,onodes[i],tag,comm,r_waits_t+i);
274:   }

276:   *rbuf    = rbuf_t;
277:   *r_waits = r_waits_t;
278:   return(0);
279: }