Actual source code: nn.c

  1: #include <../src/ksp/pc/impls/is/nn/nn.h>

  3: /*
  4:    PCSetUp_NN - Prepares for the use of the NN preconditioner
  5:                     by setting data structures and options.

  7:    Input Parameter:
  8: .  pc - the preconditioner context

 10:    Application Interface Routine: PCSetUp()

 12:    Note:
 13:    The interface routine PCSetUp() is not usually called directly by
 14:    the user, but instead is called by PCApply() if necessary.
 15: */
 16: static PetscErrorCode PCSetUp_NN(PC pc)
 17: {
 18:   PetscFunctionBegin;
 19:   if (!pc->setupcalled) {
 20:     /* Set up all the "iterative substructuring" common block */
 21:     PetscCall(PCISSetUp(pc, PETSC_TRUE, PETSC_TRUE));
 22:     /* Create the coarse matrix. */
 23:     PetscCall(PCNNCreateCoarseMatrix(pc));
 24:   }
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }

 28: /*
 29:    PCApply_NN - Applies the NN preconditioner to a vector.

 31:    Input Parameters:
 32: +  pc - the preconditioner context
 33: -  r - input vector (global)

 35:    Output Parameter:
 36: .  z - output vector (global)

 38:    Application Interface Routine: PCApply()
 39:  */
 40: static PetscErrorCode PCApply_NN(PC pc, Vec r, Vec z)
 41: {
 42:   PC_IS      *pcis  = (PC_IS *)pc->data;
 43:   PetscScalar m_one = -1.0;
 44:   Vec         w     = pcis->vec1_global;

 46:   PetscFunctionBegin;
 47:   /*
 48:     Dirichlet solvers.
 49:     Solving $ B_I^{(i)}r_I^{(i)} $ at each processor.
 50:     Storing the local results at vec2_D
 51:   */
 52:   PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
 53:   PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
 54:   PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));

 56:   /*
 57:     Computing $ r_B - \sum_j \tilde R_j^T A_{BI}^{(j)} (B_I^{(j)}r_I^{(j)}) $ .
 58:     Storing the result in the interface portion of the global vector w.
 59:   */
 60:   PetscCall(MatMult(pcis->A_BI, pcis->vec2_D, pcis->vec1_B));
 61:   PetscCall(VecScale(pcis->vec1_B, m_one));
 62:   PetscCall(VecCopy(r, w));
 63:   PetscCall(VecScatterBegin(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));
 64:   PetscCall(VecScatterEnd(pcis->global_to_B, pcis->vec1_B, w, ADD_VALUES, SCATTER_REVERSE));

 66:   /*
 67:     Apply the interface preconditioner
 68:   */
 69:   PetscCall(PCNNApplyInterfacePreconditioner(pc, w, z, pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec3_B, pcis->vec1_D, pcis->vec3_D, pcis->vec1_N, pcis->vec2_N));

 71:   /*
 72:     Computing $ t_I^{(i)} = A_{IB}^{(i)} \tilde R_i z_B $
 73:     The result is stored in vec1_D.
 74:   */
 75:   PetscCall(VecScatterBegin(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
 76:   PetscCall(VecScatterEnd(pcis->global_to_B, z, pcis->vec1_B, INSERT_VALUES, SCATTER_FORWARD));
 77:   PetscCall(MatMult(pcis->A_IB, pcis->vec1_B, pcis->vec1_D));

 79:   /*
 80:     Dirichlet solvers.
 81:     Computing $ B_I^{(i)}t_I^{(i)} $ and sticking into the global vector the blocks
 82:     $ B_I^{(i)}r_I^{(i)} - B_I^{(i)}t_I^{(i)} $.
 83:   */
 84:   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
 85:   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
 86:   PetscCall(KSPSolve(pcis->ksp_D, pcis->vec1_D, pcis->vec2_D));
 87:   PetscCall(VecScale(pcis->vec2_D, m_one));
 88:   PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
 89:   PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, ADD_VALUES, SCATTER_REVERSE));
 90:   PetscFunctionReturn(PETSC_SUCCESS);
 91: }

 93: /*
 94:    PCDestroy_NN - Destroys the private context for the NN preconditioner
 95:    that was created with PCCreate_NN().

 97:    Input Parameter:
 98: .  pc - the preconditioner context

100:    Application Interface Routine: PCDestroy()
101: */
102: static PetscErrorCode PCDestroy_NN(PC pc)
103: {
104:   PC_NN *pcnn = (PC_NN *)pc->data;

106:   PetscFunctionBegin;
107:   PetscCall(PCISReset(pc));

109:   PetscCall(MatDestroy(&pcnn->coarse_mat));
110:   PetscCall(VecDestroy(&pcnn->coarse_x));
111:   PetscCall(VecDestroy(&pcnn->coarse_b));
112:   PetscCall(KSPDestroy(&pcnn->ksp_coarse));
113:   if (pcnn->DZ_IN) {
114:     PetscCall(PetscFree(pcnn->DZ_IN[0]));
115:     PetscCall(PetscFree(pcnn->DZ_IN));
116:   }

118:   /*
119:       Free the private data structure that was hanging off the PC
120:   */
121:   PetscCall(PetscFree(pc->data));
122:   PetscFunctionReturn(PETSC_SUCCESS);
123: }

125: /*MC
126:    PCNN - Balancing Neumann-Neumann for scalar elliptic PDEs.

128:    Options Database Keys:
129: +    -pc_nn_turn_off_first_balancing - do not balance the residual before solving the local Neumann problems
130:                                        (this skips the first coarse grid solve in the preconditioner)
131: .    -pc_nn_turn_off_second_balancing - do not balance the solution solving the local Neumann problems
132:                                        (this skips the second coarse grid solve in the preconditioner)
133: .    -pc_is_damp_fixed <fact> -
134: .    -pc_is_remove_nullspace_fixed -
135: .    -pc_is_set_damping_factor_floating <fact> -
136: .    -pc_is_not_damp_floating -
137: -    -pc_is_not_remove_nullspace_floating -

139:    Options Database prefixes for the subsolvers this preconditioner uses:
140: +  -nn_coarse_pc_ - for the coarse grid preconditioner
141: .  -is_localD_pc_ - for the Dirichlet subproblem preconditioner
142: -  -is_localN_pc_ - for the Neumann subproblem preconditioner

144:    Level: intermediate

146:    Notes:
147:    The matrix used with this preconditioner must be of type `MATIS`

149:    Unlike more 'conventional' Neumann-Neumann preconditioners this iterates over ALL the
150:    degrees of freedom, NOT just those on the interface (this allows the use of approximate solvers
151:    on the subdomains; though in our experience using approximate solvers is slower.).

153:    Contributed by Paulo Goldfeld

155: .seealso: [](ch_ksp), `PCCreate()`, `PCSetType()`, `PCType`, `PC`, `MATIS`, `PCBDDC`
156: M*/

158: PETSC_EXTERN PetscErrorCode PCCreate_NN(PC pc)
159: {
160:   PC_NN *pcnn;

162:   PetscFunctionBegin;
163:   /*
164:      Creates the private data structure for this preconditioner and
165:      attach it to the PC object.
166:   */
167:   PetscCall(PetscNew(&pcnn));
168:   pc->data = (void *)pcnn;

170:   PetscCall(PCISInitialize(pc));
171:   pcnn->coarse_mat = NULL;
172:   pcnn->coarse_x   = NULL;
173:   pcnn->coarse_b   = NULL;
174:   pcnn->ksp_coarse = NULL;
175:   pcnn->DZ_IN      = NULL;

177:   /*
178:       Set the pointers for the functions that are provided above.
179:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
180:       are called, they will automatically call these functions.  Note we
181:       choose not to provide a couple of these functions since they are
182:       not needed.
183:   */
184:   pc->ops->apply               = PCApply_NN;
185:   pc->ops->applytranspose      = NULL;
186:   pc->ops->setup               = PCSetUp_NN;
187:   pc->ops->destroy             = PCDestroy_NN;
188:   pc->ops->view                = NULL;
189:   pc->ops->applyrichardson     = NULL;
190:   pc->ops->applysymmetricleft  = NULL;
191:   pc->ops->applysymmetricright = NULL;
192:   PetscFunctionReturn(PETSC_SUCCESS);
193: }

195: /*
196:    PCNNCreateCoarseMatrix -
197: */
198: PetscErrorCode PCNNCreateCoarseMatrix(PC pc)
199: {
200:   MPI_Request  *send_request, *recv_request;
201:   PetscInt      i, j, k;
202:   PetscScalar  *mat;    /* Sub-matrix with this subdomain's contribution to the coarse matrix             */
203:   PetscScalar **DZ_OUT; /* proc[k].DZ_OUT[i][] = bit of vector to be sent from processor k to processor i */

205:   PC_IS        *pcis     = (PC_IS *)pc->data;
206:   PC_NN        *pcnn     = (PC_NN *)pc->data;
207:   PetscInt     *neigh    = pcis->neigh;
208:   PetscInt     *n_shared = pcis->n_shared;
209:   PetscInt    **shared   = pcis->shared;
210:   PetscMPIInt   n_neigh;
211:   PetscScalar **DZ_IN; /* Must be initialized after memory allocation. */

213:   PetscFunctionBegin;
214:   PetscCall(PetscMPIIntCast(pcis->n_neigh, &n_neigh));
215:   /* Allocate memory for mat (the +1 is to handle the case n_neigh equal to zero) */
216:   PetscCall(PetscMalloc1(n_neigh * n_neigh + 1, &mat));

218:   /* Allocate memory for DZ */
219:   /* Notice that DZ_OUT[0] is allocated some space that is never used. */
220:   /* This is just in order to DZ_OUT and DZ_IN to have exactly the same form. */
221:   {
222:     PetscInt size_of_Z = 0;
223:     PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &pcnn->DZ_IN));
224:     DZ_IN = pcnn->DZ_IN;
225:     PetscCall(PetscMalloc((n_neigh + 1) * sizeof(PetscScalar *), &DZ_OUT));
226:     for (i = 0; i < n_neigh; i++) size_of_Z += n_shared[i];
227:     PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_IN[0]));
228:     PetscCall(PetscMalloc((size_of_Z + 1) * sizeof(PetscScalar), &DZ_OUT[0]));
229:   }
230:   for (i = 1; i < n_neigh; i++) {
231:     DZ_IN[i]  = DZ_IN[i - 1] + n_shared[i - 1];
232:     DZ_OUT[i] = DZ_OUT[i - 1] + n_shared[i - 1];
233:   }

235:   /* Set the values of DZ_OUT, in order to send this info to the neighbours */
236:   /* First, set the auxiliary array pcis->work_N. */
237:   PetscCall(PCISScatterArrayNToVecB(pc, pcis->work_N, pcis->D, INSERT_VALUES, SCATTER_REVERSE));
238:   for (i = 1; i < n_neigh; i++) {
239:     for (j = 0; j < n_shared[i]; j++) DZ_OUT[i][j] = pcis->work_N[shared[i][j]];
240:   }

242:   /* Non-blocking send/receive the common-interface chunks of scaled nullspaces */
243:   /* Notice that send_request[] and recv_request[] could have one less element. */
244:   /* We make them longer to have request[i] corresponding to neigh[i].          */
245:   {
246:     PetscMPIInt tag;
247:     PetscCall(PetscObjectGetNewTag((PetscObject)pc, &tag));
248:     PetscCall(PetscMalloc2(n_neigh + 1, &send_request, n_neigh + 1, &recv_request));
249:     for (i = 1; i < n_neigh; i++) {
250:       PetscMPIInt nn;

252:       PetscCall(PetscMPIIntCast(neigh[i], &nn));
253:       PetscCallMPI(MPIU_Isend(DZ_OUT[i], n_shared[i], MPIU_SCALAR, nn, tag, PetscObjectComm((PetscObject)pc), &send_request[i]));
254:       PetscCallMPI(MPIU_Irecv(DZ_IN[i], n_shared[i], MPIU_SCALAR, nn, tag, PetscObjectComm((PetscObject)pc), &recv_request[i]));
255:     }
256:   }

258:   /* Set DZ_IN[0][] (recall that neigh[0]==rank, always) */
259:   for (j = 0; j < n_shared[0]; j++) DZ_IN[0][j] = pcis->work_N[shared[0][j]];

261:   /* Start computing with local D*Z while communication goes on.    */
262:   /* Apply Schur complement. The result is "stored" in vec (more    */
263:   /* precisely, vec points to the result, stored in pc_nn->vec1_B)  */
264:   /* and also scattered to pcnn->work_N.                            */
265:   PetscCall(PCNNApplySchurToChunk(pc, n_shared[0], shared[0], DZ_IN[0], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));

267:   /* Compute the first column, while completing the receiving. */
268:   for (i = 0; i < n_neigh; i++) {
269:     MPI_Status  stat;
270:     PetscMPIInt ind = 0;
271:     if (i > 0) {
272:       PetscCallMPI(MPI_Waitany(n_neigh - 1, recv_request + 1, &ind, &stat));
273:       ind++;
274:     }
275:     mat[ind * n_neigh + 0] = 0.0;
276:     for (k = 0; k < n_shared[ind]; k++) mat[ind * n_neigh + 0] += DZ_IN[ind][k] * pcis->work_N[shared[ind][k]];
277:   }

279:   /* Compute the remaining of the columns */
280:   for (j = 1; j < n_neigh; j++) {
281:     PetscCall(PCNNApplySchurToChunk(pc, n_shared[j], shared[j], DZ_IN[j], pcis->work_N, pcis->vec1_B, pcis->vec2_B, pcis->vec1_D, pcis->vec2_D));
282:     for (i = 0; i < n_neigh; i++) {
283:       mat[i * n_neigh + j] = 0.0;
284:       for (k = 0; k < n_shared[i]; k++) mat[i * n_neigh + j] += DZ_IN[i][k] * pcis->work_N[shared[i][k]];
285:     }
286:   }

288:   /* Complete the sending. */
289:   if (n_neigh > 1) {
290:     MPI_Status *stat;
291:     PetscCall(PetscMalloc1(n_neigh - 1, &stat));
292:     if (n_neigh - 1) PetscCallMPI(MPI_Waitall(n_neigh - 1, &send_request[1], stat));
293:     PetscCall(PetscFree(stat));
294:   }

296:   /* Free the memory for the MPI requests */
297:   PetscCall(PetscFree2(send_request, recv_request));

299:   /* Free the memory for DZ_OUT */
300:   if (DZ_OUT) {
301:     PetscCall(PetscFree(DZ_OUT[0]));
302:     PetscCall(PetscFree(DZ_OUT));
303:   }

305:   {
306:     PetscMPIInt size;
307:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
308:     /* Create the global coarse vectors (rhs and solution). */
309:     PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)pc), 1, size, &pcnn->coarse_b));
310:     PetscCall(VecDuplicate(pcnn->coarse_b, &pcnn->coarse_x));
311:     /* Create and set the global coarse AIJ matrix. */
312:     PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcnn->coarse_mat));
313:     PetscCall(MatSetSizes(pcnn->coarse_mat, 1, 1, size, size));
314:     PetscCall(MatSetType(pcnn->coarse_mat, MATAIJ));
315:     PetscCall(MatSeqAIJSetPreallocation(pcnn->coarse_mat, 1, NULL));
316:     PetscCall(MatMPIAIJSetPreallocation(pcnn->coarse_mat, 1, NULL, n_neigh, NULL));
317:     PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
318:     PetscCall(MatSetOption(pcnn->coarse_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
319:     PetscCall(MatSetValues(pcnn->coarse_mat, n_neigh, neigh, n_neigh, neigh, mat, ADD_VALUES));
320:     PetscCall(MatAssemblyBegin(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
321:     PetscCall(MatAssemblyEnd(pcnn->coarse_mat, MAT_FINAL_ASSEMBLY));
322:   }

324:   {
325:     PetscMPIInt rank;
326:     PetscScalar one = 1.0;
327:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
328:     /* "Zero out" rows of not-purely-Neumann subdomains */
329:     if (pcis->pure_neumann) { /* does NOT zero the row; create an empty index set. The reason is that MatZeroRows() is collective. */
330:       PetscCall(MatZeroRows(pcnn->coarse_mat, 0, NULL, one, NULL, NULL));
331:     } else { /* here it DOES zero the row, since it's not a floating subdomain. */
332:       PetscInt row = rank;
333:       PetscCall(MatZeroRows(pcnn->coarse_mat, 1, &row, one, NULL, NULL));
334:     }
335:   }

337:   /* Create the coarse linear solver context */
338:   {
339:     PC  pc_ctx, inner_pc;
340:     KSP inner_ksp;

342:     PetscCall(KSPCreate(PetscObjectComm((PetscObject)pc), &pcnn->ksp_coarse));
343:     PetscCall(KSPSetNestLevel(pcnn->ksp_coarse, pc->kspnestlevel));
344:     PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcnn->ksp_coarse, (PetscObject)pc, 2));
345:     PetscCall(KSPSetOperators(pcnn->ksp_coarse, pcnn->coarse_mat, pcnn->coarse_mat));
346:     PetscCall(KSPGetPC(pcnn->ksp_coarse, &pc_ctx));
347:     PetscCall(PCSetType(pc_ctx, PCREDUNDANT));
348:     PetscCall(KSPSetType(pcnn->ksp_coarse, KSPPREONLY));
349:     PetscCall(PCRedundantGetKSP(pc_ctx, &inner_ksp));
350:     PetscCall(KSPGetPC(inner_ksp, &inner_pc));
351:     PetscCall(PCSetType(inner_pc, PCLU));
352:     PetscCall(KSPSetOptionsPrefix(pcnn->ksp_coarse, "nn_coarse_"));
353:     PetscCall(KSPSetFromOptions(pcnn->ksp_coarse));
354:     /* the vectors in the following line are dummy arguments, just telling the KSP the vector size. Values are not used */
355:     PetscCall(KSPSetUp(pcnn->ksp_coarse));
356:   }

358:   /* Free the memory for mat */
359:   PetscCall(PetscFree(mat));

361:   /* for DEBUGGING, save the coarse matrix to a file. */
362:   {
363:     PetscBool flg = PETSC_FALSE;
364:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_save_coarse_matrix", &flg, NULL));
365:     if (flg) {
366:       PetscViewer viewer;
367:       PetscCall(PetscViewerASCIIOpen(PETSC_COMM_WORLD, "coarse.m", &viewer));
368:       PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
369:       PetscCall(MatView(pcnn->coarse_mat, viewer));
370:       PetscCall(PetscViewerPopFormat(viewer));
371:       PetscCall(PetscViewerDestroy(&viewer));
372:     }
373:   }

375:   /*  Set the variable pcnn->factor_coarse_rhs. */
376:   pcnn->factor_coarse_rhs = (pcis->pure_neumann) ? 1.0 : 0.0;

378:   /* See historical note 02, at the bottom of this file. */
379:   PetscFunctionReturn(PETSC_SUCCESS);
380: }

382: /*
383:    PCNNApplySchurToChunk -

385:    Input parameters:
386: .  pcnn
387: .  n - size of chunk
388: .  idx - indices of chunk
389: .  chunk - values

391:    Output parameters:
392: .  array_N - result of Schur complement applied to chunk, scattered to big array
393: .  vec1_B  - result of Schur complement applied to chunk
394: .  vec2_B  - garbage (used as work space)
395: .  vec1_D  - garbage (used as work space)
396: .  vec2_D  - garbage (used as work space)

398: */
399: PetscErrorCode PCNNApplySchurToChunk(PC pc, PetscInt n, PetscInt *idx, PetscScalar *chunk, PetscScalar *array_N, Vec vec1_B, Vec vec2_B, Vec vec1_D, Vec vec2_D)
400: {
401:   PetscInt i;
402:   PC_IS   *pcis = (PC_IS *)pc->data;

404:   PetscFunctionBegin;
405:   PetscCall(PetscArrayzero(array_N, pcis->n));
406:   for (i = 0; i < n; i++) array_N[idx[i]] = chunk[i];
407:   PetscCall(PCISScatterArrayNToVecB(pc, array_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
408:   PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
409:   PetscCall(PCISScatterArrayNToVecB(pc, array_N, vec1_B, INSERT_VALUES, SCATTER_REVERSE));
410:   PetscFunctionReturn(PETSC_SUCCESS);
411: }

413: /*
414:    PCNNApplyInterfacePreconditioner - Apply the interface preconditioner, i.e.,
415:                                       the preconditioner for the Schur complement.

417:    Input parameter:
418: .  r - global vector of interior and interface nodes. The values on the interior nodes are NOT used.

420:    Output parameters:
421: .  z - global vector of interior and interface nodes. The values on the interface are the result of
422:        the application of the interface preconditioner to the interface part of r. The values on the
423:        interior nodes are garbage.
424: .  work_N - array of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
425: .  vec1_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
426: .  vec2_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
427: .  vec3_B - vector of local interface nodes (including ghosts); returns garbage (used as work space)
428: .  vec1_D - vector of local interior nodes; returns garbage (used as work space)
429: .  vec2_D - vector of local interior nodes; returns garbage (used as work space)
430: .  vec1_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)
431: .  vec2_N - vector of local nodes (interior and interface, including ghosts); returns garbage (used as work space)

433: */
434: PetscErrorCode PCNNApplyInterfacePreconditioner(PC pc, Vec r, Vec z, PetscScalar *work_N, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, Vec vec1_N, Vec vec2_N)
435: {
436:   PC_IS *pcis = (PC_IS *)pc->data;

438:   PetscFunctionBegin;
439:   /*
440:     First balancing step.
441:   */
442:   {
443:     PetscBool flg = PETSC_FALSE;
444:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_nn_turn_off_first_balancing", &flg, NULL));
445:     if (!flg) {
446:       PetscCall(PCNNBalancing(pc, r, (Vec)0, z, vec1_B, vec2_B, (Vec)0, vec1_D, vec2_D, work_N));
447:     } else {
448:       PetscCall(VecCopy(r, z));
449:     }
450:   }

452:   /*
453:     Extract the local interface part of z and scale it by D
454:   */
455:   PetscCall(VecScatterBegin(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
456:   PetscCall(VecScatterEnd(pcis->global_to_B, z, vec1_B, INSERT_VALUES, SCATTER_FORWARD));
457:   PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));

459:   /* Neumann Solver */
460:   PetscCall(PCISApplyInvSchur(pc, vec2_B, vec1_B, vec1_N, vec2_N));

462:   /*
463:     Second balancing step.
464:   */
465:   {
466:     PetscBool flg = PETSC_FALSE;
467:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-pc_turn_off_second_balancing", &flg, NULL));
468:     if (!flg) {
469:       PetscCall(PCNNBalancing(pc, r, vec1_B, z, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D, work_N));
470:     } else {
471:       PetscCall(VecPointwiseMult(vec2_B, pcis->D, vec1_B));
472:       PetscCall(VecSet(z, 0.0));
473:       PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
474:       PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
475:     }
476:   }
477:   PetscFunctionReturn(PETSC_SUCCESS);
478: }

480: /*
481:    PCNNBalancing - Computes z, as given in equations (15) and (16) (if the
482:                    input argument u is provided), or s, as given in equations
483:                    (12) and (13), if the input argument u is a null vector.
484:                    Notice that the input argument u plays the role of u_i in
485:                    equation (14). The equation numbers refer to [Man93].

487:    Input Parameters:
488: +  pcnn - NN preconditioner context.
489: .  r - MPI vector of all nodes (interior and interface). It's preserved.
490: -  u - (Optional) sequential vector of local interface nodes. It's preserved UNLESS vec3_B is null.

492:    Output Parameters:
493: +  z - MPI vector of interior and interface nodes. Returns s or z (see description above).
494: .  vec1_B - Sequential vector of local interface nodes. Workspace.
495: .  vec2_B - Sequential vector of local interface nodes. Workspace.
496: .  vec3_B - (Optional) sequential vector of local interface nodes. Workspace.
497: .  vec1_D - Sequential vector of local interior nodes. Workspace.
498: .  vec2_D - Sequential vector of local interior nodes. Workspace.
499: -  work_N - Array of all local nodes (interior and interface). Workspace.

501: */
502: PetscErrorCode PCNNBalancing(PC pc, Vec r, Vec u, Vec z, Vec vec1_B, Vec vec2_B, Vec vec3_B, Vec vec1_D, Vec vec2_D, PetscScalar *work_N)
503: {
504:   PetscInt     k;
505:   PetscScalar  value;
506:   PetscScalar *lambda;
507:   PC_NN       *pcnn = (PC_NN *)pc->data;
508:   PC_IS       *pcis = (PC_IS *)pc->data;

510:   PetscFunctionBegin;
511:   PetscCall(PetscLogEventBegin(PC_ApplyCoarse, pc, 0, 0, 0));
512:   if (u) {
513:     if (!vec3_B) vec3_B = u;
514:     PetscCall(VecPointwiseMult(vec1_B, pcis->D, u));
515:     PetscCall(VecSet(z, 0.0));
516:     PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
517:     PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
518:     PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
519:     PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
520:     PetscCall(PCISApplySchur(pc, vec2_B, vec3_B, (Vec)0, vec1_D, vec2_D));
521:     PetscCall(VecScale(vec3_B, -1.0));
522:     PetscCall(VecCopy(r, z));
523:     PetscCall(VecScatterBegin(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
524:     PetscCall(VecScatterEnd(pcis->global_to_B, vec3_B, z, ADD_VALUES, SCATTER_REVERSE));
525:   } else {
526:     PetscCall(VecCopy(r, z));
527:   }
528:   PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
529:   PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
530:   PetscCall(PCISScatterArrayNToVecB(pc, work_N, vec2_B, INSERT_VALUES, SCATTER_REVERSE));
531:   for (k = 0, value = 0.0; k < pcis->n_shared[0]; k++) value += pcnn->DZ_IN[0][k] * work_N[pcis->shared[0][k]];
532:   value *= pcnn->factor_coarse_rhs; /* This factor is set in CreateCoarseMatrix(). */
533:   {
534:     PetscMPIInt rank;
535:     PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
536:     PetscCall(VecSetValue(pcnn->coarse_b, rank, value, INSERT_VALUES));
537:     /*
538:        Since we are only inserting local values (one value actually) we don't need to do the
539:        reduction that tells us there is no data that needs to be moved. Hence we comment out these
540:        PetscCall(VecAssemblyBegin(pcnn->coarse_b));
541:        PetscCall(VecAssemblyEnd  (pcnn->coarse_b));
542:     */
543:   }
544:   PetscCall(KSPSolve(pcnn->ksp_coarse, pcnn->coarse_b, pcnn->coarse_x));
545:   if (!u) PetscCall(VecScale(pcnn->coarse_x, -1.0));
546:   PetscCall(VecGetArray(pcnn->coarse_x, &lambda));
547:   for (k = 0; k < pcis->n_shared[0]; k++) work_N[pcis->shared[0][k]] = *lambda * pcnn->DZ_IN[0][k];
548:   PetscCall(VecRestoreArray(pcnn->coarse_x, &lambda));
549:   PetscCall(PCISScatterArrayNToVecB(pc, work_N, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
550:   PetscCall(VecSet(z, 0.0));
551:   PetscCall(VecScatterBegin(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
552:   PetscCall(VecScatterEnd(pcis->global_to_B, vec2_B, z, ADD_VALUES, SCATTER_REVERSE));
553:   if (!u) {
554:     PetscCall(VecScatterBegin(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
555:     PetscCall(VecScatterEnd(pcis->global_to_B, z, vec2_B, INSERT_VALUES, SCATTER_FORWARD));
556:     PetscCall(PCISApplySchur(pc, vec2_B, vec1_B, (Vec)0, vec1_D, vec2_D));
557:     PetscCall(VecCopy(r, z));
558:   }
559:   PetscCall(VecScatterBegin(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
560:   PetscCall(VecScatterEnd(pcis->global_to_B, vec1_B, z, ADD_VALUES, SCATTER_REVERSE));
561:   PetscCall(PetscLogEventEnd(PC_ApplyCoarse, pc, 0, 0, 0));
562:   PetscFunctionReturn(PETSC_SUCCESS);
563: }

565: /*  From now on, "footnotes" (or "historical notes").  */
566: /*
567:    Historical note 01

569:    We considered the possibility of an alternative D_i that would still
570:    provide a partition of unity (i.e., $ \sum_i  N_i D_i N_i^T = I $).
571:    The basic principle was still the pseudo-inverse of the counting
572:    function; the difference was that we would not count subdomains
573:    that do not contribute to the coarse space (i.e., not pure-Neumann
574:    subdomains).

576:    This turned out to be a bad idea:  we would solve trivial Neumann
577:    problems in the not pure-Neumann subdomains, since we would be scaling
578:    the balanced residual by zero.
579: */

581: /*
582:    Historical note 02

584:    We tried an alternative coarse problem, that would eliminate exactly a
585:    constant error. Turned out not to improve the overall convergence.
586: */