Actual source code: gs.c
1: /***********************************gs.c***************************************
3: Author: Henry M. Tufo III
5: e-mail: hmt@cs.brown.edu
7: snail-mail:
8: Division of Applied Mathematics
9: Brown University
10: Providence, RI 02912
12: Last Modification:
13: 6.21.97
14: ************************************gs.c**************************************/
16: /***********************************gs.c***************************************
17: File Description:
18: -----------------
20: ************************************gs.c**************************************/
22: #include <../src/ksp/pc/impls/tfs/tfs.h>
24: /* default length of number of items via tree - doubles if exceeded */
25: #define TREE_BUF_SZ 2048
26: #define GS_VEC_SZ 1
28: /***********************************gs.c***************************************
29: Type: struct gather_scatter_id
30: ------------------------------
32: ************************************gs.c**************************************/
33: typedef struct gather_scatter_id {
34: PetscInt id;
35: PetscInt nel_min;
36: PetscInt nel_max;
37: PetscInt nel_sum;
38: PetscInt negl;
39: PetscInt gl_max;
40: PetscInt gl_min;
41: PetscInt repeats;
42: PetscInt ordered;
43: PetscInt positive;
44: PetscScalar *vals;
46: /* bit mask info */
47: PetscInt *my_proc_mask;
48: PetscInt mask_sz;
49: PetscInt *ngh_buf;
50: PetscInt ngh_buf_sz;
51: PetscInt *nghs;
52: PetscInt num_nghs;
53: PetscInt max_nghs;
54: PetscInt *pw_nghs;
55: PetscInt num_pw_nghs;
56: PetscInt *tree_nghs;
57: PetscInt num_tree_nghs;
59: PetscInt num_loads;
61: /* repeats == true -> local info */
62: PetscInt nel; /* number of unique elements */
63: PetscInt *elms; /* of size nel */
64: PetscInt nel_total;
65: PetscInt *local_elms; /* of size nel_total */
66: PetscInt *companion; /* of size nel_total */
68: /* local info */
69: PetscInt num_local_total;
70: PetscInt local_strength;
71: PetscInt num_local;
72: PetscInt *num_local_reduce;
73: PetscInt **local_reduce;
74: PetscInt num_local_gop;
75: PetscInt *num_gop_local_reduce;
76: PetscInt **gop_local_reduce;
78: /* pairwise info */
79: PetscInt level;
80: PetscInt num_pairs;
81: PetscInt max_pairs;
82: PetscInt loc_node_pairs;
83: PetscInt max_node_pairs;
84: PetscInt min_node_pairs;
85: PetscInt avg_node_pairs;
86: PetscInt *pair_list;
87: PetscInt *msg_sizes;
88: PetscInt **node_list;
89: PetscInt len_pw_list;
90: PetscInt *pw_elm_list;
91: PetscScalar *pw_vals;
93: MPI_Request *msg_ids_in;
94: MPI_Request *msg_ids_out;
96: PetscScalar *out;
97: PetscScalar *in;
98: PetscInt msg_total;
100: /* tree - crystal accumulator info */
101: PetscInt max_left_over;
102: PetscInt *pre;
103: PetscInt *in_num;
104: PetscInt *out_num;
105: PetscInt **in_list;
106: PetscInt **out_list;
108: /* new tree work*/
109: PetscInt tree_nel;
110: PetscInt *tree_elms;
111: PetscScalar *tree_buf;
112: PetscScalar *tree_work;
114: PetscInt tree_map_sz;
115: PetscInt *tree_map_in;
116: PetscInt *tree_map_out;
118: /* current memory status */
119: PetscInt gl_bss_min;
120: PetscInt gl_perm_min;
122: /* max segment size for PCTFS_gs_gop_vec() */
123: PetscInt vec_sz;
125: /* hack to make paul happy */
126: MPI_Comm PCTFS_gs_comm;
128: } PCTFS_gs_id;
130: static PCTFS_gs_id *gsi_check_args(PetscInt *elms, PetscInt nel, PetscInt level);
131: static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs);
132: static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs);
133: static PetscErrorCode set_pairwise(PCTFS_gs_id *gs);
134: static PCTFS_gs_id *gsi_new(void);
135: static PetscErrorCode set_tree(PCTFS_gs_id *gs);
137: /* same for all but vector flavor */
138: static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals);
139: /* vector flavor */
140: static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
142: static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
143: static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step);
144: static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
145: static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
146: static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step);
148: static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals);
149: static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals);
151: static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
152: static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim);
153: static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim);
155: /* global vars */
156: /* from comm.c module */
158: static PetscInt num_gs_ids = 0;
160: /* should make this dynamic ... later */
161: static PetscInt msg_buf = MAX_MSG_BUF;
162: static PetscInt vec_sz = GS_VEC_SZ;
163: static PetscInt *tree_buf = NULL;
164: static PetscInt tree_buf_sz = 0;
165: static PetscInt ntree = 0;
167: /***************************************************************************/
168: PetscErrorCode PCTFS_gs_init_vec_sz(PetscInt size)
169: {
170: PetscFunctionBegin;
171: vec_sz = size;
172: PetscFunctionReturn(PETSC_SUCCESS);
173: }
175: /******************************************************************************/
176: PetscErrorCode PCTFS_gs_init_msg_buf_sz(PetscInt buf_size)
177: {
178: PetscFunctionBegin;
179: msg_buf = buf_size;
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: /******************************************************************************/
184: PCTFS_gs_id *PCTFS_gs_init(PetscInt *elms, PetscInt nel, PetscInt level)
185: {
186: PCTFS_gs_id *gs;
187: MPI_Group PCTFS_gs_group;
188: MPI_Comm PCTFS_gs_comm;
190: /* ensure that communication package has been initialized */
191: PetscCallAbort(PETSC_COMM_SELF, PCTFS_comm_init());
193: /* determines if we have enough dynamic/semi-static memory */
194: /* checks input, allocs and sets gd_id template */
195: gs = gsi_check_args(elms, nel, level);
197: /* only bit mask version up and working for the moment */
198: /* LATER :: get int list version working for sparse pblms */
199: PetscCallAbort(PETSC_COMM_WORLD, gsi_via_bit_mask(gs));
201: PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_group(MPI_COMM_WORLD, &PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
202: PetscCallAbort(PETSC_COMM_WORLD, MPI_Comm_create(MPI_COMM_WORLD, PCTFS_gs_group, &PCTFS_gs_comm) ? PETSC_ERR_MPI : PETSC_SUCCESS);
203: PetscCallAbort(PETSC_COMM_WORLD, MPI_Group_free(&PCTFS_gs_group) ? PETSC_ERR_MPI : PETSC_SUCCESS);
205: gs->PCTFS_gs_comm = PCTFS_gs_comm;
207: return gs;
208: }
210: /******************************************************************************/
211: static PCTFS_gs_id *gsi_new(void)
212: {
213: PCTFS_gs_id *gs;
214: gs = (PCTFS_gs_id *)malloc(sizeof(PCTFS_gs_id));
215: PetscCallAbort(PETSC_COMM_WORLD, PetscMemzero(gs, sizeof(PCTFS_gs_id)));
216: return gs;
217: }
219: /******************************************************************************/
220: static PCTFS_gs_id *gsi_check_args(PetscInt *in_elms, PetscInt nel, PetscInt level)
221: {
222: PetscInt i, j, k, t2;
223: PetscInt *companion, *elms, *unique, *iptr;
224: PetscInt num_local = 0, *num_to_reduce, **local_reduce;
225: PetscInt oprs[] = {NON_UNIFORM, GL_MIN, GL_MAX, GL_ADD, GL_MIN, GL_MAX, GL_MIN, GL_B_AND};
226: PetscInt vals[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
227: PetscInt work[PETSC_STATIC_ARRAY_LENGTH(oprs) - 1];
228: PCTFS_gs_id *gs;
230: if (!in_elms) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "elms point to nothing!!!\n");
231: if (nel < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "can't have fewer than 0 elms!!!\n");
233: if (nel == 0) PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "I don't have any elements!!!\n"));
235: /* get space for gs template */
236: gs = gsi_new();
237: gs->id = ++num_gs_ids;
239: /* hmt 6.4.99 */
240: /* caller can set global ids that don't participate to 0 */
241: /* PCTFS_gs_init ignores all zeros in elm list */
242: /* negative global ids are still invalid */
243: for (i = j = 0; i < nel; i++) {
244: if (in_elms[i] != 0) j++;
245: }
247: k = nel;
248: nel = j;
250: /* copy over in_elms list and create inverse map */
251: elms = (PetscInt *)malloc((nel + 1) * sizeof(PetscInt));
252: companion = (PetscInt *)malloc(nel * sizeof(PetscInt));
254: for (i = j = 0; i < k; i++) {
255: if (in_elms[i] != 0) {
256: elms[j] = in_elms[i];
257: companion[j++] = i;
258: }
259: }
261: if (j != nel) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "nel j mismatch!\n");
263: /* pre-pass ... check to see if sorted */
264: elms[nel] = INT_MAX;
265: iptr = elms;
266: unique = elms + 1;
267: j = 0;
268: while (*iptr != INT_MAX) {
269: if (*iptr++ > *unique++) {
270: j = 1;
271: break;
272: }
273: }
275: /* set up inverse map */
276: if (j) {
277: PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list *not* sorted!\n"));
278: PetscCallAbort(PETSC_COMM_WORLD, PCTFS_SMI_sort((void *)elms, (void *)companion, nel, SORT_INTEGER));
279: } else PetscCallAbort(PETSC_COMM_WORLD, PetscInfo(0, "gsi_check_args() :: elm list sorted!\n"));
280: elms[nel] = INT_MIN;
282: /* first pass */
283: /* determine number of unique elements, check pd */
284: for (i = k = 0; i < nel; i += j) {
285: t2 = elms[i];
286: j = ++i;
288: /* clump 'em for now */
289: while (elms[j] == t2) j++;
291: /* how many together and num local */
292: if (j -= i) {
293: num_local++;
294: k += j;
295: }
296: }
298: /* how many unique elements? */
299: gs->repeats = k;
300: gs->nel = nel - k;
302: /* number of repeats? */
303: gs->num_local = num_local;
304: num_local += 2;
305: gs->local_reduce = local_reduce = (PetscInt **)malloc(num_local * sizeof(PetscInt *));
306: gs->num_local_reduce = num_to_reduce = (PetscInt *)malloc(num_local * sizeof(PetscInt));
308: unique = (PetscInt *)malloc((gs->nel + 1) * sizeof(PetscInt));
309: gs->elms = unique;
310: gs->nel_total = nel;
311: gs->local_elms = elms;
312: gs->companion = companion;
314: /* compess map as well as keep track of local ops */
315: for (num_local = i = j = 0; i < gs->nel; i++) {
316: k = j;
317: t2 = unique[i] = elms[j];
318: companion[i] = companion[j];
320: while (elms[j] == t2) j++;
322: if ((t2 = (j - k)) > 1) {
323: /* number together */
324: num_to_reduce[num_local] = t2++;
326: iptr = local_reduce[num_local++] = (PetscInt *)malloc(t2 * sizeof(PetscInt));
328: /* to use binary searching don't remap until we check intersection */
329: *iptr++ = i;
331: /* note that we're skipping the first one */
332: while (++k < j) *(iptr++) = companion[k];
333: *iptr = -1;
334: }
335: }
337: /* sentinel for ngh_buf */
338: unique[gs->nel] = INT_MAX;
340: /* for two partition sort hack */
341: num_to_reduce[num_local] = 0;
342: local_reduce[num_local] = NULL;
343: num_to_reduce[++num_local] = 0;
344: local_reduce[num_local] = NULL;
346: /* load 'em up */
347: /* note one extra to hold NON_UNIFORM flag!!! */
348: vals[2] = vals[1] = vals[0] = nel;
349: if (gs->nel > 0) {
350: vals[3] = unique[0];
351: vals[4] = unique[gs->nel - 1];
352: } else {
353: vals[3] = INT_MAX;
354: vals[4] = INT_MIN;
355: }
356: vals[5] = level;
357: vals[6] = num_gs_ids;
359: /* GLOBAL: send 'em out */
360: PetscCallAbort(PETSC_COMM_WORLD, PCTFS_giop(vals, work, PETSC_STATIC_ARRAY_LENGTH(oprs) - 1, oprs));
362: /* must be semi-pos def - only pairwise depends on this */
363: /* LATER - remove this restriction */
364: if (vals[3] < 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system not semi-pos def \n");
365: if (vals[4] == INT_MAX) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system ub too large !\n");
367: gs->nel_min = vals[0];
368: gs->nel_max = vals[1];
369: gs->nel_sum = vals[2];
370: gs->gl_min = vals[3];
371: gs->gl_max = vals[4];
372: gs->negl = vals[4] - vals[3] + 1;
374: if (gs->negl <= 0) SETERRABORT(PETSC_COMM_WORLD, PETSC_ERR_PLIB, "gsi_check_args() :: system empty or neg :: %" PetscInt_FMT "\n", gs->negl);
376: /* LATER :: add level == -1 -> program selects level */
377: if (vals[5] < 0) vals[5] = 0;
378: else if (vals[5] > PCTFS_num_nodes) vals[5] = PCTFS_num_nodes;
379: gs->level = vals[5];
381: return gs;
382: }
384: /******************************************************************************/
385: static PetscErrorCode gsi_via_bit_mask(PCTFS_gs_id *gs)
386: {
387: PetscInt i, nel, *elms;
388: PetscInt t1;
389: PetscInt **reduce;
390: PetscInt *map;
392: PetscFunctionBegin;
393: /* totally local removes ... PCTFS_ct_bits == 0 */
394: PetscCall(get_ngh_buf(gs));
396: if (gs->level) PetscCall(set_pairwise(gs));
397: if (gs->max_left_over) PetscCall(set_tree(gs));
399: /* intersection local and pairwise/tree? */
400: gs->num_local_total = gs->num_local;
401: gs->gop_local_reduce = gs->local_reduce;
402: gs->num_gop_local_reduce = gs->num_local_reduce;
404: map = gs->companion;
406: /* is there any local compression */
407: if (!gs->num_local) {
408: gs->local_strength = NONE;
409: gs->num_local_gop = 0;
410: } else {
411: /* ok find intersection */
412: map = gs->companion;
413: reduce = gs->local_reduce;
414: for (i = 0, t1 = 0; i < gs->num_local; i++, reduce++) {
415: if ((PCTFS_ivec_binary_search(**reduce, gs->pw_elm_list, gs->len_pw_list) >= 0) || PCTFS_ivec_binary_search(**reduce, gs->tree_map_in, gs->tree_map_sz) >= 0) {
416: t1++;
417: PetscCheck(gs->num_local_reduce[i] > 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nobody in list?");
418: gs->num_local_reduce[i] *= -1;
419: }
420: **reduce = map[**reduce];
421: }
423: /* intersection is empty */
424: if (!t1) {
425: gs->local_strength = FULL;
426: gs->num_local_gop = 0;
427: } else { /* intersection not empty */
428: gs->local_strength = PARTIAL;
430: PetscCall(PCTFS_SMI_sort((void *)gs->num_local_reduce, (void *)gs->local_reduce, gs->num_local + 1, SORT_INT_PTR));
432: gs->num_local_gop = t1;
433: gs->num_local_total = gs->num_local;
434: gs->num_local -= t1;
435: gs->gop_local_reduce = gs->local_reduce;
436: gs->num_gop_local_reduce = gs->num_local_reduce;
438: for (i = 0; i < t1; i++) {
439: PetscCheck(gs->num_gop_local_reduce[i] < 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "they aren't negative?");
440: gs->num_gop_local_reduce[i] *= -1;
441: gs->local_reduce++;
442: gs->num_local_reduce++;
443: }
444: gs->local_reduce++;
445: gs->num_local_reduce++;
446: }
447: }
449: elms = gs->pw_elm_list;
450: nel = gs->len_pw_list;
451: for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
453: elms = gs->tree_map_in;
454: nel = gs->tree_map_sz;
455: for (i = 0; i < nel; i++) elms[i] = map[elms[i]];
457: /* clean up */
458: free((void *)gs->local_elms);
459: free((void *)gs->companion);
460: free((void *)gs->elms);
461: free((void *)gs->ngh_buf);
462: gs->local_elms = gs->companion = gs->elms = gs->ngh_buf = NULL;
463: PetscFunctionReturn(PETSC_SUCCESS);
464: }
466: /******************************************************************************/
467: static PetscErrorCode place_in_tree(PetscInt elm)
468: {
469: PetscInt *tp, n;
471: PetscFunctionBegin;
472: if (ntree == tree_buf_sz) {
473: if (tree_buf_sz) {
474: tp = tree_buf;
475: n = tree_buf_sz;
476: tree_buf_sz <<= 1;
477: tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
478: PCTFS_ivec_copy(tree_buf, tp, n);
479: free(tp);
480: } else {
481: tree_buf_sz = TREE_BUF_SZ;
482: tree_buf = (PetscInt *)malloc(tree_buf_sz * sizeof(PetscInt));
483: }
484: }
486: tree_buf[ntree++] = elm;
487: PetscFunctionReturn(PETSC_SUCCESS);
488: }
490: /******************************************************************************/
491: static PetscErrorCode get_ngh_buf(PCTFS_gs_id *gs)
492: {
493: PetscInt i, j, npw = 0, ntree_map = 0;
494: PetscInt p_mask_size, ngh_buf_size, buf_size;
495: PetscInt *p_mask, *sh_proc_mask, *pw_sh_proc_mask;
496: PetscInt *ngh_buf, *buf1, *buf2;
497: PetscInt offset, per_load, num_loads, or_ct, start, end;
498: PetscInt *ptr1, *ptr2, i_start, negl, nel, *elms;
499: PetscInt oper = GL_B_OR;
500: PetscInt *ptr3, *t_mask, level, ct1, ct2;
502: PetscFunctionBegin;
503: /* to make life easier */
504: nel = gs->nel;
505: elms = gs->elms;
506: level = gs->level;
508: /* det #bytes needed for processor bit masks and init w/mask cor. to PCTFS_my_id */
509: p_mask = (PetscInt *)malloc(p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes));
510: PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
512: /* allocate space for masks and info bufs */
513: gs->nghs = sh_proc_mask = (PetscInt *)malloc(p_mask_size);
514: gs->pw_nghs = pw_sh_proc_mask = (PetscInt *)malloc(p_mask_size);
515: gs->ngh_buf_sz = ngh_buf_size = p_mask_size * nel;
516: t_mask = (PetscInt *)malloc(p_mask_size);
517: gs->ngh_buf = ngh_buf = (PetscInt *)malloc(ngh_buf_size);
519: /* comm buffer size ... memory usage bounded by ~2*msg_buf */
520: /* had thought I could exploit rendezvous threshold */
522: /* default is one pass */
523: per_load = negl = gs->negl;
524: gs->num_loads = num_loads = 1;
525: i = p_mask_size * negl;
527: /* possible overflow on buffer size */
528: /* overflow hack */
529: if (i < 0) i = INT_MAX;
531: buf_size = PetscMin(msg_buf, i);
533: /* can we do it? */
534: PetscCheck(p_mask_size <= buf_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "get_ngh_buf() :: buf<pms :: %" PetscInt_FMT ">%" PetscInt_FMT, p_mask_size, buf_size);
536: /* get PCTFS_giop buf space ... make *only* one malloc */
537: buf1 = (PetscInt *)malloc(buf_size << 1);
539: /* more than one gior exchange needed? */
540: if (buf_size != i) {
541: per_load = buf_size / p_mask_size;
542: buf_size = per_load * p_mask_size;
543: gs->num_loads = num_loads = negl / per_load + (negl % per_load > 0);
544: }
546: /* convert buf sizes from #bytes to #ints - 32-bit only! */
547: p_mask_size /= sizeof(PetscInt);
548: ngh_buf_size /= sizeof(PetscInt);
549: buf_size /= sizeof(PetscInt);
551: /* find PCTFS_giop work space */
552: buf2 = buf1 + buf_size;
554: /* hold #ints needed for processor masks */
555: gs->mask_sz = p_mask_size;
557: /* init buffers */
558: PetscCall(PCTFS_ivec_zero(sh_proc_mask, p_mask_size));
559: PetscCall(PCTFS_ivec_zero(pw_sh_proc_mask, p_mask_size));
560: PetscCall(PCTFS_ivec_zero(ngh_buf, ngh_buf_size));
562: /* HACK reset tree info */
563: tree_buf = NULL;
564: tree_buf_sz = ntree = 0;
566: /* ok do it */
567: for (ptr1 = ngh_buf, ptr2 = elms, end = gs->gl_min, or_ct = i = 0; or_ct < num_loads; or_ct++) {
568: /* identity for bitwise or is 000...000 */
569: PetscCall(PCTFS_ivec_zero(buf1, buf_size));
571: /* load msg buffer */
572: for (start = end, end += per_load, i_start = i; (offset = *ptr2) < end; i++, ptr2++) {
573: offset = (offset - start) * p_mask_size;
574: PCTFS_ivec_copy(buf1 + offset, p_mask, p_mask_size);
575: }
577: /* GLOBAL: pass buffer */
578: PetscCall(PCTFS_giop(buf1, buf2, buf_size, &oper));
580: /* unload buffer into ngh_buf */
581: ptr2 = (elms + i_start);
582: for (ptr3 = buf1, j = start; j < end; ptr3 += p_mask_size, j++) {
583: /* I own it ... may have to pairwise it */
584: if (j == *ptr2) {
585: /* do i share it w/anyone? */
586: ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
587: /* guess not */
588: if (ct1 < 2) {
589: ptr2++;
590: ptr1 += p_mask_size;
591: continue;
592: }
594: /* i do ... so keep info and turn off my bit */
595: PCTFS_ivec_copy(ptr1, ptr3, p_mask_size);
596: PetscCall(PCTFS_ivec_xor(ptr1, p_mask, p_mask_size));
597: PetscCall(PCTFS_ivec_or(sh_proc_mask, ptr1, p_mask_size));
599: /* is it to be done pairwise? */
600: if (--ct1 <= level) {
601: npw++;
603: /* turn on high bit to indicate pw need to process */
604: *ptr2++ |= TOP_BIT;
605: PetscCall(PCTFS_ivec_or(pw_sh_proc_mask, ptr1, p_mask_size));
606: ptr1 += p_mask_size;
607: continue;
608: }
610: /* get set for next and note that I have a tree contribution */
611: /* could save exact elm index for tree here -> save a search */
612: ptr2++;
613: ptr1 += p_mask_size;
614: ntree_map++;
615: } else { /* i don't but still might be involved in tree */
617: /* shared by how many? */
618: ct1 = PCTFS_ct_bits((char *)ptr3, p_mask_size * sizeof(PetscInt));
620: /* none! */
621: if (ct1 < 2) continue;
623: /* is it going to be done pairwise? but not by me of course!*/
624: if (--ct1 <= level) continue;
625: }
626: /* LATER we're going to have to process it NOW */
627: /* nope ... tree it */
628: PetscCall(place_in_tree(j));
629: }
630: }
632: free((void *)t_mask);
633: free((void *)buf1);
635: gs->len_pw_list = npw;
636: gs->num_nghs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
638: /* expand from bit mask list to int list and save ngh list */
639: gs->nghs = (PetscInt *)malloc(gs->num_nghs * sizeof(PetscInt));
640: PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), gs->nghs));
642: gs->num_pw_nghs = PCTFS_ct_bits((char *)pw_sh_proc_mask, p_mask_size * sizeof(PetscInt));
644: oper = GL_MAX;
645: ct1 = gs->num_nghs;
646: PetscCall(PCTFS_giop(&ct1, &ct2, 1, &oper));
647: gs->max_nghs = ct1;
649: gs->tree_map_sz = ntree_map;
650: gs->max_left_over = ntree;
652: free((void *)p_mask);
653: free((void *)sh_proc_mask);
654: PetscFunctionReturn(PETSC_SUCCESS);
655: }
657: /******************************************************************************/
658: static PetscErrorCode set_pairwise(PCTFS_gs_id *gs)
659: {
660: PetscInt i, j;
661: PetscInt p_mask_size;
662: PetscInt *p_mask, *sh_proc_mask, *tmp_proc_mask;
663: PetscInt *ngh_buf, *buf2;
664: PetscInt offset;
665: PetscInt *msg_list, *msg_size, **msg_nodes, nprs;
666: PetscInt *pairwise_elm_list, len_pair_list = 0;
667: PetscInt *iptr, t1, i_start, nel, *elms;
668: PetscInt ct;
670: PetscFunctionBegin;
671: /* to make life easier */
672: nel = gs->nel;
673: elms = gs->elms;
674: ngh_buf = gs->ngh_buf;
675: sh_proc_mask = gs->pw_nghs;
677: /* need a few temp masks */
678: p_mask_size = PCTFS_len_bit_mask(PCTFS_num_nodes);
679: p_mask = (PetscInt *)malloc(p_mask_size);
680: tmp_proc_mask = (PetscInt *)malloc(p_mask_size);
682: /* set mask to my PCTFS_my_id's bit mask */
683: PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size, PCTFS_my_id));
685: p_mask_size /= sizeof(PetscInt);
687: len_pair_list = gs->len_pw_list;
688: gs->pw_elm_list = pairwise_elm_list = (PetscInt *)malloc((len_pair_list + 1) * sizeof(PetscInt));
690: /* how many processors (nghs) do we have to exchange with? */
691: nprs = gs->num_pairs = PCTFS_ct_bits((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt));
693: /* allocate space for PCTFS_gs_gop() info */
694: gs->pair_list = msg_list = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
695: gs->msg_sizes = msg_size = (PetscInt *)malloc(sizeof(PetscInt) * nprs);
696: gs->node_list = msg_nodes = (PetscInt **)malloc(sizeof(PetscInt *) * (nprs + 1));
698: /* init msg_size list */
699: PetscCall(PCTFS_ivec_zero(msg_size, nprs));
701: /* expand from bit mask list to int list */
702: PetscCall(PCTFS_bm_to_proc((char *)sh_proc_mask, p_mask_size * sizeof(PetscInt), msg_list));
704: /* keep list of elements being handled pairwise */
705: for (i = j = 0; i < nel; i++) {
706: if (elms[i] & TOP_BIT) {
707: elms[i] ^= TOP_BIT;
708: pairwise_elm_list[j++] = i;
709: }
710: }
711: pairwise_elm_list[j] = -1;
713: gs->msg_ids_out = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
714: gs->msg_ids_out[nprs] = MPI_REQUEST_NULL;
715: gs->msg_ids_in = (MPI_Request *)malloc(sizeof(MPI_Request) * (nprs + 1));
716: gs->msg_ids_in[nprs] = MPI_REQUEST_NULL;
717: gs->pw_vals = (PetscScalar *)malloc(sizeof(PetscScalar) * len_pair_list * vec_sz);
719: /* find who goes to each processor */
720: for (i_start = i = 0; i < nprs; i++) {
721: /* processor i's mask */
722: PetscCall(PCTFS_set_bit_mask(p_mask, p_mask_size * sizeof(PetscInt), msg_list[i]));
724: /* det # going to processor i */
725: for (ct = j = 0; j < len_pair_list; j++) {
726: buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
727: PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
728: if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) ct++;
729: }
730: msg_size[i] = ct;
731: i_start = PetscMax(i_start, ct);
733: /*space to hold nodes in message to first neighbor */
734: msg_nodes[i] = iptr = (PetscInt *)malloc(sizeof(PetscInt) * (ct + 1));
736: for (j = 0; j < len_pair_list; j++) {
737: buf2 = ngh_buf + (pairwise_elm_list[j] * p_mask_size);
738: PetscCall(PCTFS_ivec_and3(tmp_proc_mask, p_mask, buf2, p_mask_size));
739: if (PCTFS_ct_bits((char *)tmp_proc_mask, p_mask_size * sizeof(PetscInt))) *iptr++ = j;
740: }
741: *iptr = -1;
742: }
743: msg_nodes[nprs] = NULL;
745: j = gs->loc_node_pairs = i_start;
746: t1 = GL_MAX;
747: PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
748: gs->max_node_pairs = i_start;
750: i_start = j;
751: t1 = GL_MIN;
752: PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
753: gs->min_node_pairs = i_start;
755: i_start = j;
756: t1 = GL_ADD;
757: PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
758: gs->avg_node_pairs = i_start / PCTFS_num_nodes + 1;
760: i_start = nprs;
761: t1 = GL_MAX;
762: PetscCall(PCTFS_giop(&i_start, &offset, 1, &t1));
763: gs->max_pairs = i_start;
765: /* remap pairwise in tail of gsi_via_bit_mask() */
766: gs->msg_total = PCTFS_ivec_sum(gs->msg_sizes, nprs);
767: gs->out = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
768: gs->in = (PetscScalar *)malloc(sizeof(PetscScalar) * gs->msg_total * vec_sz);
770: /* reset malloc pool */
771: free((void *)p_mask);
772: free((void *)tmp_proc_mask);
773: PetscFunctionReturn(PETSC_SUCCESS);
774: }
776: /* to do pruned tree just save ngh buf copy for each one and decode here!
777: ******************************************************************************/
778: static PetscErrorCode set_tree(PCTFS_gs_id *gs)
779: {
780: PetscInt i, j, n, nel;
781: PetscInt *iptr_in, *iptr_out, *tree_elms, *elms;
783: PetscFunctionBegin;
784: /* local work ptrs */
785: elms = gs->elms;
786: nel = gs->nel;
788: /* how many via tree */
789: gs->tree_nel = n = ntree;
790: gs->tree_elms = tree_elms = iptr_in = tree_buf;
791: gs->tree_buf = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
792: gs->tree_work = (PetscScalar *)malloc(sizeof(PetscScalar) * n * vec_sz);
793: j = gs->tree_map_sz;
794: gs->tree_map_in = iptr_in = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
795: gs->tree_map_out = iptr_out = (PetscInt *)malloc(sizeof(PetscInt) * (j + 1));
797: /* search the longer of the two lists */
798: /* note ... could save this info in get_ngh_buf and save searches */
799: if (n <= nel) {
800: /* bijective fct w/remap - search elm list */
801: for (i = 0; i < n; i++) {
802: if ((j = PCTFS_ivec_binary_search(*tree_elms++, elms, nel)) >= 0) {
803: *iptr_in++ = j;
804: *iptr_out++ = i;
805: }
806: }
807: } else {
808: for (i = 0; i < nel; i++) {
809: if ((j = PCTFS_ivec_binary_search(*elms++, tree_elms, n)) >= 0) {
810: *iptr_in++ = i;
811: *iptr_out++ = j;
812: }
813: }
814: }
816: /* sentinel */
817: *iptr_in = *iptr_out = -1;
818: PetscFunctionReturn(PETSC_SUCCESS);
819: }
821: /******************************************************************************/
822: static PetscErrorCode PCTFS_gs_gop_local_out(PCTFS_gs_id *gs, PetscScalar *vals)
823: {
824: PetscInt *num, *map, **reduce;
825: PetscScalar tmp;
827: PetscFunctionBegin;
828: num = gs->num_gop_local_reduce;
829: reduce = gs->gop_local_reduce;
830: while ((map = *reduce++)) {
831: /* wall */
832: if (*num == 2) {
833: num++;
834: vals[map[1]] = vals[map[0]];
835: } else if (*num == 3) { /* corner shared by three elements */
836: num++;
837: vals[map[2]] = vals[map[1]] = vals[map[0]];
838: } else if (*num == 4) { /* corner shared by four elements */
839: num++;
840: vals[map[3]] = vals[map[2]] = vals[map[1]] = vals[map[0]];
841: } else { /* general case ... odd geoms ... 3D*/
842: num++;
843: tmp = *(vals + *map++);
844: while (*map >= 0) *(vals + *map++) = tmp;
845: }
846: }
847: PetscFunctionReturn(PETSC_SUCCESS);
848: }
850: /******************************************************************************/
851: static PetscErrorCode PCTFS_gs_gop_local_plus(PCTFS_gs_id *gs, PetscScalar *vals)
852: {
853: PetscInt *num, *map, **reduce;
854: PetscScalar tmp;
856: PetscFunctionBegin;
857: num = gs->num_local_reduce;
858: reduce = gs->local_reduce;
859: while ((map = *reduce)) {
860: /* wall */
861: if (*num == 2) {
862: num++;
863: reduce++;
864: vals[map[1]] = vals[map[0]] += vals[map[1]];
865: } else if (*num == 3) { /* corner shared by three elements */
866: num++;
867: reduce++;
868: vals[map[2]] = vals[map[1]] = vals[map[0]] += (vals[map[1]] + vals[map[2]]);
869: } else if (*num == 4) { /* corner shared by four elements */
870: num++;
871: reduce++;
872: vals[map[1]] = vals[map[2]] = vals[map[3]] = vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
873: } else { /* general case ... odd geoms ... 3D*/
874: num++;
875: tmp = 0.0;
876: while (*map >= 0) tmp += *(vals + *map++);
878: map = *reduce++;
879: while (*map >= 0) *(vals + *map++) = tmp;
880: }
881: }
882: PetscFunctionReturn(PETSC_SUCCESS);
883: }
885: /******************************************************************************/
886: static PetscErrorCode PCTFS_gs_gop_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals)
887: {
888: PetscInt *num, *map, **reduce;
889: PetscScalar *base;
891: PetscFunctionBegin;
892: num = gs->num_gop_local_reduce;
893: reduce = gs->gop_local_reduce;
894: while ((map = *reduce++)) {
895: /* wall */
896: if (*num == 2) {
897: num++;
898: vals[map[0]] += vals[map[1]];
899: } else if (*num == 3) { /* corner shared by three elements */
900: num++;
901: vals[map[0]] += (vals[map[1]] + vals[map[2]]);
902: } else if (*num == 4) { /* corner shared by four elements */
903: num++;
904: vals[map[0]] += (vals[map[1]] + vals[map[2]] + vals[map[3]]);
905: } else { /* general case ... odd geoms ... 3D*/
906: num++;
907: base = vals + *map++;
908: while (*map >= 0) *base += *(vals + *map++);
909: }
910: }
911: PetscFunctionReturn(PETSC_SUCCESS);
912: }
914: /******************************************************************************/
915: PetscErrorCode PCTFS_gs_free(PCTFS_gs_id *gs)
916: {
917: PetscInt i;
919: PetscFunctionBegin;
920: PetscCallMPI(MPI_Comm_free(&gs->PCTFS_gs_comm));
921: if (gs->nghs) free((void *)gs->nghs);
922: if (gs->pw_nghs) free((void *)gs->pw_nghs);
924: /* tree */
925: if (gs->max_left_over) {
926: if (gs->tree_elms) free((void *)gs->tree_elms);
927: if (gs->tree_buf) free((void *)gs->tree_buf);
928: if (gs->tree_work) free((void *)gs->tree_work);
929: if (gs->tree_map_in) free((void *)gs->tree_map_in);
930: if (gs->tree_map_out) free((void *)gs->tree_map_out);
931: }
933: /* pairwise info */
934: if (gs->num_pairs) {
935: /* should be NULL already */
936: if (gs->ngh_buf) free((void *)gs->ngh_buf);
937: if (gs->elms) free((void *)gs->elms);
938: if (gs->local_elms) free((void *)gs->local_elms);
939: if (gs->companion) free((void *)gs->companion);
941: /* only set if pairwise */
942: if (gs->vals) free((void *)gs->vals);
943: if (gs->in) free((void *)gs->in);
944: if (gs->out) free((void *)gs->out);
945: if (gs->msg_ids_in) free((void *)gs->msg_ids_in);
946: if (gs->msg_ids_out) free((void *)gs->msg_ids_out);
947: if (gs->pw_vals) free((void *)gs->pw_vals);
948: if (gs->pw_elm_list) free((void *)gs->pw_elm_list);
949: if (gs->node_list) {
950: for (i = 0; i < gs->num_pairs; i++) {
951: if (gs->node_list[i]) free((void *)gs->node_list[i]);
952: }
953: free((void *)gs->node_list);
954: }
955: if (gs->msg_sizes) free((void *)gs->msg_sizes);
956: if (gs->pair_list) free((void *)gs->pair_list);
957: }
959: /* local info */
960: if (gs->num_local_total >= 0) {
961: for (i = 0; i < gs->num_local_total + 1; i++) {
962: if (gs->num_gop_local_reduce[i]) free((void *)gs->gop_local_reduce[i]);
963: }
964: }
966: /* if intersection tree/pairwise and local isn't empty */
967: if (gs->gop_local_reduce) free((void *)gs->gop_local_reduce);
968: if (gs->num_gop_local_reduce) free((void *)gs->num_gop_local_reduce);
970: free((void *)gs);
971: PetscFunctionReturn(PETSC_SUCCESS);
972: }
974: /******************************************************************************/
975: PetscErrorCode PCTFS_gs_gop_vec(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt step)
976: {
977: PetscFunctionBegin;
978: switch (*op) {
979: case '+':
980: PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
981: break;
982: default:
983: PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: %c is not a valid op\n", op[0]));
984: PetscCall(PetscInfo(0, "PCTFS_gs_gop_vec() :: default :: plus\n"));
985: PetscCall(PCTFS_gs_gop_vec_plus(gs, vals, step));
986: break;
987: }
988: PetscFunctionReturn(PETSC_SUCCESS);
989: }
991: /******************************************************************************/
992: static PetscErrorCode PCTFS_gs_gop_vec_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
993: {
994: PetscFunctionBegin;
995: PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PCTFS_gs_gop_vec() passed NULL gs handle!!!");
997: /* local only operations!!! */
998: if (gs->num_local) PetscCall(PCTFS_gs_gop_vec_local_plus(gs, vals, step));
1000: /* if intersection tree/pairwise and local isn't empty */
1001: if (gs->num_local_gop) {
1002: PetscCall(PCTFS_gs_gop_vec_local_in_plus(gs, vals, step));
1004: /* pairwise */
1005: if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1007: /* tree */
1008: else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1010: PetscCall(PCTFS_gs_gop_vec_local_out(gs, vals, step));
1011: } else { /* if intersection tree/pairwise and local is empty */
1012: /* pairwise */
1013: if (gs->num_pairs) PetscCall(PCTFS_gs_gop_vec_pairwise_plus(gs, vals, step));
1015: /* tree */
1016: else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, vals, step));
1017: }
1018: PetscFunctionReturn(PETSC_SUCCESS);
1019: }
1021: /******************************************************************************/
1022: static PetscErrorCode PCTFS_gs_gop_vec_local_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1023: {
1024: PetscInt *num, *map, **reduce;
1025: PetscScalar *base;
1027: PetscFunctionBegin;
1028: num = gs->num_local_reduce;
1029: reduce = gs->local_reduce;
1030: while ((map = *reduce)) {
1031: base = vals + map[0] * step;
1033: /* wall */
1034: if (*num == 2) {
1035: num++;
1036: reduce++;
1037: PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1038: PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1039: } else if (*num == 3) { /* corner shared by three elements */
1040: num++;
1041: reduce++;
1042: PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1043: PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1044: PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1045: PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1046: } else if (*num == 4) { /* corner shared by four elements */
1047: num++;
1048: reduce++;
1049: PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1050: PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1051: PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
1052: PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
1053: PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1054: PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1055: } else { /* general case ... odd geoms ... 3D */
1056: num++;
1057: while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1059: map = *reduce;
1060: while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1062: reduce++;
1063: }
1064: }
1065: PetscFunctionReturn(PETSC_SUCCESS);
1066: }
1068: /******************************************************************************/
1069: static PetscErrorCode PCTFS_gs_gop_vec_local_in_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1070: {
1071: PetscInt *num, *map, **reduce;
1072: PetscScalar *base;
1074: PetscFunctionBegin;
1075: num = gs->num_gop_local_reduce;
1076: reduce = gs->gop_local_reduce;
1077: while ((map = *reduce++)) {
1078: base = vals + map[0] * step;
1080: /* wall */
1081: if (*num == 2) {
1082: num++;
1083: PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1084: } else if (*num == 3) { /* corner shared by three elements */
1085: num++;
1086: PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1087: PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1088: } else if (*num == 4) { /* corner shared by four elements */
1089: num++;
1090: PetscCall(PCTFS_rvec_add(base, vals + map[1] * step, step));
1091: PetscCall(PCTFS_rvec_add(base, vals + map[2] * step, step));
1092: PetscCall(PCTFS_rvec_add(base, vals + map[3] * step, step));
1093: } else { /* general case ... odd geoms ... 3D*/
1094: num++;
1095: while (*++map >= 0) PetscCall(PCTFS_rvec_add(base, vals + *map * step, step));
1096: }
1097: }
1098: PetscFunctionReturn(PETSC_SUCCESS);
1099: }
1101: /******************************************************************************/
1102: static PetscErrorCode PCTFS_gs_gop_vec_local_out(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1103: {
1104: PetscInt *num, *map, **reduce;
1105: PetscScalar *base;
1107: PetscFunctionBegin;
1108: num = gs->num_gop_local_reduce;
1109: reduce = gs->gop_local_reduce;
1110: while ((map = *reduce++)) {
1111: base = vals + map[0] * step;
1113: /* wall */
1114: if (*num == 2) {
1115: num++;
1116: PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1117: } else if (*num == 3) { /* corner shared by three elements */
1118: num++;
1119: PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1120: PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1121: } else if (*num == 4) { /* corner shared by four elements */
1122: num++;
1123: PetscCall(PCTFS_rvec_copy(vals + map[1] * step, base, step));
1124: PetscCall(PCTFS_rvec_copy(vals + map[2] * step, base, step));
1125: PetscCall(PCTFS_rvec_copy(vals + map[3] * step, base, step));
1126: } else { /* general case ... odd geoms ... 3D*/
1127: num++;
1128: while (*++map >= 0) PetscCall(PCTFS_rvec_copy(vals + *map * step, base, step));
1129: }
1130: }
1131: PetscFunctionReturn(PETSC_SUCCESS);
1132: }
1134: /******************************************************************************/
1135: static PetscErrorCode PCTFS_gs_gop_vec_pairwise_plus(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt step)
1136: {
1137: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1138: PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
1139: PetscInt *pw, *list, *size, **nodes;
1140: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1141: MPI_Status status;
1142: PetscBLASInt i1 = 1, dstep;
1144: PetscFunctionBegin;
1145: /* strip and load s */
1146: msg_list = list = gs->pair_list;
1147: msg_size = size = gs->msg_sizes;
1148: msg_nodes = nodes = gs->node_list;
1149: iptr = pw = gs->pw_elm_list;
1150: dptr1 = dptr3 = gs->pw_vals;
1151: msg_ids_in = ids_in = gs->msg_ids_in;
1152: msg_ids_out = ids_out = gs->msg_ids_out;
1153: dptr2 = gs->out;
1154: in1 = in2 = gs->in;
1156: /* post the receives */
1157: /* msg_nodes=nodes; */
1158: do {
1159: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1160: second one *list and do list++ afterwards */
1161: PetscCallMPI(MPI_Irecv(in1, *size * step, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
1162: list++;
1163: msg_ids_in++;
1164: in1 += *size++ * step;
1165: } while (*++msg_nodes);
1166: msg_nodes = nodes;
1168: /* load gs values into in out gs buffers */
1169: while (*iptr >= 0) {
1170: PetscCall(PCTFS_rvec_copy(dptr3, in_vals + *iptr * step, step));
1171: dptr3 += step;
1172: iptr++;
1173: }
1175: /* load out buffers and post the sends */
1176: while ((iptr = *msg_nodes++)) {
1177: dptr3 = dptr2;
1178: while (*iptr >= 0) {
1179: PetscCall(PCTFS_rvec_copy(dptr2, dptr1 + *iptr * step, step));
1180: dptr2 += step;
1181: iptr++;
1182: }
1183: PetscCallMPI(MPI_Isend(dptr3, *msg_size * step, MPIU_SCALAR, *msg_list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
1184: msg_size++;
1185: msg_list++;
1186: msg_ids_out++;
1187: }
1189: /* tree */
1190: if (gs->max_left_over) PetscCall(PCTFS_gs_gop_vec_tree_plus(gs, in_vals, step));
1192: /* process the received data */
1193: msg_nodes = nodes;
1194: while ((iptr = *nodes++)) {
1195: PetscScalar d1 = 1.0;
1197: /* Should I check the return value of MPI_Wait() or status? */
1198: /* Can this loop be replaced by a call to MPI_Waitall()? */
1199: PetscCallMPI(MPI_Wait(ids_in, &status));
1200: ids_in++;
1201: while (*iptr >= 0) {
1202: PetscCall(PetscBLASIntCast(step, &dstep));
1203: PetscCallBLAS("BLASaxpy", BLASaxpy_(&dstep, &d1, in2, &i1, dptr1 + *iptr * step, &i1));
1204: in2 += step;
1205: iptr++;
1206: }
1207: }
1209: /* replace vals */
1210: while (*pw >= 0) {
1211: PetscCall(PCTFS_rvec_copy(in_vals + *pw * step, dptr1, step));
1212: dptr1 += step;
1213: pw++;
1214: }
1216: /* clear isend message handles */
1217: /* This changed for clarity though it could be the same */
1219: /* Should I check the return value of MPI_Wait() or status? */
1220: /* Can this loop be replaced by a call to MPI_Waitall()? */
1221: while (*msg_nodes++) {
1222: PetscCallMPI(MPI_Wait(ids_out, &status));
1223: ids_out++;
1224: }
1225: PetscFunctionReturn(PETSC_SUCCESS);
1226: }
1228: /******************************************************************************/
1229: static PetscErrorCode PCTFS_gs_gop_vec_tree_plus(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt step)
1230: {
1231: PetscInt size, *in, *out;
1232: PetscScalar *buf, *work;
1233: PetscInt op[] = {GL_ADD, 0};
1234: PetscBLASInt i1 = 1;
1235: PetscBLASInt dstep;
1237: PetscFunctionBegin;
1238: /* copy over to local variables */
1239: in = gs->tree_map_in;
1240: out = gs->tree_map_out;
1241: buf = gs->tree_buf;
1242: work = gs->tree_work;
1243: size = gs->tree_nel * step;
1245: /* zero out collection buffer */
1246: PetscCall(PCTFS_rvec_zero(buf, size));
1248: /* copy over my contributions */
1249: while (*in >= 0) {
1250: PetscCall(PetscBLASIntCast(step, &dstep));
1251: PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, vals + *in++ * step, &i1, buf + *out++ * step, &i1));
1252: }
1254: /* perform fan in/out on full buffer */
1255: /* must change PCTFS_grop to handle the blas */
1256: PetscCall(PCTFS_grop(buf, work, size, op));
1258: /* reset */
1259: in = gs->tree_map_in;
1260: out = gs->tree_map_out;
1262: /* get the portion of the results I need */
1263: while (*in >= 0) {
1264: PetscCall(PetscBLASIntCast(step, &dstep));
1265: PetscCallBLAS("BLAScopy", BLAScopy_(&dstep, buf + *out++ * step, &i1, vals + *in++ * step, &i1));
1266: }
1267: PetscFunctionReturn(PETSC_SUCCESS);
1268: }
1270: /******************************************************************************/
1271: PetscErrorCode PCTFS_gs_gop_hc(PCTFS_gs_id *gs, PetscScalar *vals, const char *op, PetscInt dim)
1272: {
1273: PetscFunctionBegin;
1274: switch (*op) {
1275: case '+':
1276: PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1277: break;
1278: default:
1279: PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: %c is not a valid op\n", op[0]));
1280: PetscCall(PetscInfo(0, "PCTFS_gs_gop_hc() :: default :: plus\n"));
1281: PetscCall(PCTFS_gs_gop_plus_hc(gs, vals, dim));
1282: break;
1283: }
1284: PetscFunctionReturn(PETSC_SUCCESS);
1285: }
1287: /******************************************************************************/
1288: static PetscErrorCode PCTFS_gs_gop_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1289: {
1290: PetscFunctionBegin;
1291: /* if there's nothing to do return */
1292: if (dim <= 0) PetscFunctionReturn(PETSC_SUCCESS);
1294: /* can't do more dimensions then exist */
1295: dim = PetscMin(dim, PCTFS_i_log2_num_nodes);
1297: /* local only operations!!! */
1298: if (gs->num_local) PetscCall(PCTFS_gs_gop_local_plus(gs, vals));
1300: /* if intersection tree/pairwise and local isn't empty */
1301: if (gs->num_local_gop) {
1302: PetscCall(PCTFS_gs_gop_local_in_plus(gs, vals));
1304: /* pairwise will do tree inside ... */
1305: if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree only */
1306: else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1308: PetscCall(PCTFS_gs_gop_local_out(gs, vals));
1309: } else { /* if intersection tree/pairwise and local is empty */
1310: /* pairwise will do tree inside */
1311: if (gs->num_pairs) PetscCall(PCTFS_gs_gop_pairwise_plus_hc(gs, vals, dim)); /* tree */
1312: else if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, vals, dim));
1313: }
1314: PetscFunctionReturn(PETSC_SUCCESS);
1315: }
1317: /******************************************************************************/
1318: static PetscErrorCode PCTFS_gs_gop_pairwise_plus_hc(PCTFS_gs_id *gs, PetscScalar *in_vals, PetscInt dim)
1319: {
1320: PetscScalar *dptr1, *dptr2, *dptr3, *in1, *in2;
1321: PetscInt *iptr, *msg_list, *msg_size, **msg_nodes;
1322: PetscInt *pw, *list, *size, **nodes;
1323: MPI_Request *msg_ids_in, *msg_ids_out, *ids_in, *ids_out;
1324: MPI_Status status;
1325: PetscInt i, mask = 1;
1327: PetscFunctionBegin;
1328: for (i = 1; i < dim; i++) {
1329: mask <<= 1;
1330: mask++;
1331: }
1333: /* strip and load s */
1334: msg_list = list = gs->pair_list;
1335: msg_size = size = gs->msg_sizes;
1336: msg_nodes = nodes = gs->node_list;
1337: iptr = pw = gs->pw_elm_list;
1338: dptr1 = dptr3 = gs->pw_vals;
1339: msg_ids_in = ids_in = gs->msg_ids_in;
1340: msg_ids_out = ids_out = gs->msg_ids_out;
1341: dptr2 = gs->out;
1342: in1 = in2 = gs->in;
1344: /* post the receives */
1345: /* msg_nodes=nodes; */
1346: do {
1347: /* Should MPI_ANY_SOURCE be replaced by *list ? In that case do the
1348: second one *list and do list++ afterwards */
1349: if ((PCTFS_my_id | mask) == (*list | mask)) {
1350: PetscCallMPI(MPI_Irecv(in1, *size, MPIU_SCALAR, MPI_ANY_SOURCE, MSGTAG1 + *list, gs->PCTFS_gs_comm, msg_ids_in));
1351: list++;
1352: msg_ids_in++;
1353: in1 += *size++;
1354: } else {
1355: list++;
1356: size++;
1357: }
1358: } while (*++msg_nodes);
1360: /* load gs values into in out gs buffers */
1361: while (*iptr >= 0) *dptr3++ = *(in_vals + *iptr++);
1363: /* load out buffers and post the sends */
1364: msg_nodes = nodes;
1365: list = msg_list;
1366: while ((iptr = *msg_nodes++)) {
1367: if ((PCTFS_my_id | mask) == (*list | mask)) {
1368: dptr3 = dptr2;
1369: while (*iptr >= 0) *dptr2++ = *(dptr1 + *iptr++);
1370: /* CHECK PERSISTENT COMMS MODE FOR ALL THIS STUFF */
1371: /* is msg_ids_out++ correct? */
1372: PetscCallMPI(MPI_Isend(dptr3, *msg_size, MPIU_SCALAR, *list, MSGTAG1 + PCTFS_my_id, gs->PCTFS_gs_comm, msg_ids_out));
1373: msg_size++;
1374: list++;
1375: msg_ids_out++;
1376: } else {
1377: list++;
1378: msg_size++;
1379: }
1380: }
1382: /* do the tree while we're waiting */
1383: if (gs->max_left_over) PetscCall(PCTFS_gs_gop_tree_plus_hc(gs, in_vals, dim));
1385: /* process the received data */
1386: msg_nodes = nodes;
1387: list = msg_list;
1388: while ((iptr = *nodes++)) {
1389: if ((PCTFS_my_id | mask) == (*list | mask)) {
1390: /* Should I check the return value of MPI_Wait() or status? */
1391: /* Can this loop be replaced by a call to MPI_Waitall()? */
1392: PetscCallMPI(MPI_Wait(ids_in, &status));
1393: ids_in++;
1394: while (*iptr >= 0) *(dptr1 + *iptr++) += *in2++;
1395: }
1396: list++;
1397: }
1399: /* replace vals */
1400: while (*pw >= 0) *(in_vals + *pw++) = *dptr1++;
1402: /* clear isend message handles */
1403: /* This changed for clarity though it could be the same */
1404: while (*msg_nodes++) {
1405: if ((PCTFS_my_id | mask) == (*msg_list | mask)) {
1406: /* Should I check the return value of MPI_Wait() or status? */
1407: /* Can this loop be replaced by a call to MPI_Waitall()? */
1408: PetscCallMPI(MPI_Wait(ids_out, &status));
1409: ids_out++;
1410: }
1411: msg_list++;
1412: }
1413: PetscFunctionReturn(PETSC_SUCCESS);
1414: }
1416: /******************************************************************************/
1417: static PetscErrorCode PCTFS_gs_gop_tree_plus_hc(PCTFS_gs_id *gs, PetscScalar *vals, PetscInt dim)
1418: {
1419: PetscInt size;
1420: PetscInt *in, *out;
1421: PetscScalar *buf, *work;
1422: PetscInt op[] = {GL_ADD, 0};
1424: PetscFunctionBegin;
1425: in = gs->tree_map_in;
1426: out = gs->tree_map_out;
1427: buf = gs->tree_buf;
1428: work = gs->tree_work;
1429: size = gs->tree_nel;
1431: PetscCall(PCTFS_rvec_zero(buf, size));
1433: while (*in >= 0) *(buf + *out++) = *(vals + *in++);
1435: in = gs->tree_map_in;
1436: out = gs->tree_map_out;
1438: PetscCall(PCTFS_grop_hc(buf, work, size, op, dim));
1440: while (*in >= 0) *(vals + *in++) = *(buf + *out++);
1441: PetscFunctionReturn(PETSC_SUCCESS);
1442: }