Actual source code: sortso.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: static inline int Compare_PetscMPIInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
5: {
6: PetscMPIInt l = *(PetscMPIInt *)left, r = *(PetscMPIInt *)right;
7: return (l < r) ? -1 : (l > r);
8: }
10: static inline int Compare_PetscInt_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
11: {
12: PetscInt l = *(PetscInt *)left, r = *(PetscInt *)right;
13: return (l < r) ? -1 : (l > r);
14: }
16: static inline int Compare_PetscReal_Private(const void *left, const void *right, PETSC_UNUSED void *ctx)
17: {
18: PetscReal l = *(PetscReal *)left, r = *(PetscReal *)right;
19: return (l < r) ? -1 : (l > r);
20: }
22: #define MIN_GALLOP_CONST_GLOBAL 8
23: static PetscInt MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
24: typedef int (*CompFunc)(const void *, const void *, void *);
26: /* Mostly to force clang uses the builtins, but can't hurt to have gcc compilers also do the same */
27: #if !defined(PETSC_USE_DEBUG) && defined(__GNUC__)
28: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
29: {
30: __builtin_memcpy(t, b, size);
31: __builtin_memmove(b, a, size);
32: __builtin_memcpy(a, t, size);
33: return;
34: }
36: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
37: {
38: __builtin_memcpy(t, ar, asize);
39: __builtin_memmove(ar, al, asize);
40: __builtin_memcpy(al, t, asize);
41: __builtin_memcpy(t, br, bsize);
42: __builtin_memmove(br, bl, bsize);
43: __builtin_memcpy(bl, t, bsize);
44: return;
45: }
47: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
48: {
49: __builtin_memcpy(dest, src, size);
50: return;
51: }
53: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
54: {
55: __builtin_memcpy(adest, asrc, asize);
56: __builtin_memcpy(bdest, bsrc, bsize);
57: return;
58: }
60: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
61: {
62: __builtin_memmove(dest, src, size);
63: return;
64: }
66: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
67: {
68: __builtin_memmove(adest, asrc, asize);
69: __builtin_memmove(bdest, bsrc, bsize);
70: return;
71: }
72: #else
73: static inline void COPYSWAPPY(char *a, char *b, char *t, size_t size)
74: {
75: PetscFunctionBegin;
76: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, b, size));
77: PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(b, a, size));
78: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(a, t, size));
79: PetscFunctionReturnVoid();
80: }
82: static inline void COPYSWAPPY2(char *al, char *ar, size_t asize, char *bl, char *br, size_t bsize, char *t)
83: {
84: PetscFunctionBegin;
85: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, ar, asize));
86: PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(ar, al, asize));
87: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(al, t, asize));
88: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(t, br, bsize));
89: PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(br, bl, bsize));
90: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bl, t, bsize));
91: PetscFunctionReturnVoid();
92: }
94: static inline void Petsc_memcpy(char *dest, const char *src, size_t size)
95: {
96: PetscFunctionBegin;
97: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(dest, src, size));
98: PetscFunctionReturnVoid();
99: }
101: static inline void Petsc_memcpy2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
102: {
103: PetscFunctionBegin;
104: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(adest, asrc, asize));
105: PetscCallAbort(PETSC_COMM_SELF, PetscMemcpy(bdest, bsrc, bsize));
106: PetscFunctionReturnVoid();
107: }
109: static inline void Petsc_memmove(char *dest, const char *src, size_t size)
110: {
111: PetscFunctionBegin;
112: PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(dest, src, size));
113: PetscFunctionReturnVoid();
114: }
116: static inline void Petsc_memmove2(char *adest, const char *asrc, size_t asize, char *bdest, const char *bsrc, size_t bsize)
117: {
118: PetscFunctionBegin;
119: PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(adest, asrc, asize));
120: PetscCallAbort(PETSC_COMM_SELF, PetscMemmove(bdest, bsrc, bsize));
121: PetscFunctionReturnVoid();
122: }
123: #endif
125: /* Start left look right. Looking for e.g. B[0] in A or mergelo. l inclusive, r inclusive. Returns first m such that arr[m] >
126: x. Output also inclusive.
128: NOTE: Both gallopsearch functions CANNOT distinguish between inserting AFTER the entire array vs inserting at entry n!! For example for an array:
130: {0,1,2,3,4,5}
132: when looking to insert "5" this routine will return *m = 6, but when looking to insert "6" it will ALSO return *m = 6.
133: */
134: static inline PetscErrorCode PetscGallopSearchLeft_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
135: {
136: PetscInt last = l, k = 1, mid, cur = l + 1;
138: PetscFunctionBegin;
139: *m = l;
140: PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchLeft", r, l);
141: if ((*cmp)(x, arr + r * size, ctx) >= 0) {
142: *m = r;
143: PetscFunctionReturn(PETSC_SUCCESS);
144: }
145: if ((*cmp)(x, (arr) + l * size, ctx) < 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS);
146: while (PETSC_TRUE) {
147: if (PetscUnlikely(cur > r)) {
148: cur = r;
149: break;
150: }
151: if ((*cmp)(x, (arr) + cur * size, ctx) < 0) break;
152: last = cur;
153: cur += (k <<= 1) + 1;
154: ++k;
155: }
156: /* standard binary search but take last 0 mid 0 cur 1 into account*/
157: while (cur > last + 1) {
158: mid = last + ((cur - last) >> 1);
159: if ((*cmp)(x, (arr) + mid * size, ctx) < 0) {
160: cur = mid;
161: } else {
162: last = mid;
163: }
164: }
165: *m = cur;
166: PetscFunctionReturn(PETSC_SUCCESS);
167: }
169: /* Start right look left. Looking for e.g. A[-1] in B or mergehi. l inclusive, r inclusive. Returns last m such that arr[m]
170: < x. Output also inclusive */
171: static inline PetscErrorCode PetscGallopSearchRight_Private(const char *arr, size_t size, CompFunc cmp, void *ctx, PetscInt l, PetscInt r, const char *x, PetscInt *m)
172: {
173: PetscInt last = r, k = 1, mid, cur = r - 1;
175: PetscFunctionBegin;
176: *m = r;
177: PetscAssert(r >= l, PETSC_COMM_SELF, PETSC_ERR_PLIB, "r %" PetscInt_FMT " < l %" PetscInt_FMT " in PetscGallopSearchRight", r, l);
178: if ((*cmp)(x, arr + l * size, ctx) <= 0) {
179: *m = l;
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
182: if ((*cmp)(x, (arr) + r * size, ctx) > 0 || PetscUnlikely(!(r - l))) PetscFunctionReturn(PETSC_SUCCESS);
183: while (PETSC_TRUE) {
184: if (PetscUnlikely(cur < l)) {
185: cur = l;
186: break;
187: }
188: if ((*cmp)(x, (arr) + cur * size, ctx) > 0) break;
189: last = cur;
190: cur -= (k <<= 1) + 1;
191: ++k;
192: }
193: /* standard binary search but take last r-1 mid r-1 cur r-2 into account*/
194: while (last > cur + 1) {
195: mid = last - ((last - cur) >> 1);
196: if ((*cmp)(x, (arr) + mid * size, ctx) > 0) {
197: cur = mid;
198: } else {
199: last = mid;
200: }
201: }
202: *m = cur;
203: PetscFunctionReturn(PETSC_SUCCESS);
204: }
206: /* Mergesort where size of left half <= size of right half, so mergesort is done left to right. Arr should be pointer to
207: complete array, left is first index of left array, mid is first index of right array, right is last index of right
208: array */
209: static inline PetscErrorCode PetscTimSortMergeLo_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
210: {
211: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
212: const PetscInt llen = mid - left;
214: PetscFunctionBegin;
215: Petsc_memcpy(tarr, arr + (left * size), llen * size);
216: while ((i < llen) && (j <= right)) {
217: if ((*cmp)(tarr + (i * size), arr + (j * size), ctx) < 0) {
218: Petsc_memcpy(arr + (k * size), tarr + (i * size), size);
219: ++k;
220: gallopright = 0;
221: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
222: PetscInt l1, l2, diff1, diff2;
223: do {
224: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
225: /* search temp for right[j], can move up to that of temp into arr immediately */
226: PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1));
227: diff1 = l1 - i;
228: Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
229: k += diff1;
230: i = l1;
231: /* search right for temp[i], can move up to that many of right into arr */
232: PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2));
233: diff2 = l2 - j;
234: Petsc_memmove((arr) + k * size, (arr) + j * size, diff2 * size);
235: k += diff2;
236: j = l2;
237: if (i >= llen || j > right) break;
238: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
239: ++MIN_GALLOP_GLOBAL;
240: }
241: } else {
242: Petsc_memmove(arr + (k * size), arr + (j * size), size);
243: ++k;
244: gallopleft = 0;
245: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
246: PetscInt l1, l2, diff1, diff2;
247: do {
248: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
249: /* search right for temp[i], can move up to that many of right into arr */
250: PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, j, right, tarr + (i * size), &l2));
251: diff2 = l2 - j;
252: Petsc_memmove(arr + (k * size), arr + (j * size), diff2 * size);
253: k += diff2;
254: j = l2;
255: /* search temp for right[j], can copy up to that of temp into arr immediately */
256: PetscCall(PetscGallopSearchLeft_Private(tarr, size, cmp, ctx, i, llen - 1, arr + (j * size), &l1));
257: diff1 = l1 - i;
258: Petsc_memcpy(arr + (k * size), tarr + (i * size), diff1 * size);
259: k += diff1;
260: i = l1;
261: if (i >= llen || j > right) break;
262: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
263: ++MIN_GALLOP_GLOBAL;
264: }
265: }
266: }
267: if (i < llen) Petsc_memcpy(arr + (k * size), tarr + (i * size), (llen - i) * size);
268: PetscFunctionReturn(PETSC_SUCCESS);
269: }
271: static inline PetscErrorCode PetscTimSortMergeLoWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
272: {
273: PetscInt i = 0, j = mid, k = left, gallopleft = 0, gallopright = 0;
274: const PetscInt llen = mid - left;
276: PetscFunctionBegin;
277: Petsc_memcpy2(atarr, arr + (left * asize), llen * asize, btarr, barr + (left * bsize), llen * bsize);
278: while ((i < llen) && (j <= right)) {
279: if ((*cmp)(atarr + (i * asize), arr + (j * asize), ctx) < 0) {
280: Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), asize, barr + (k * bsize), btarr + (i * bsize), bsize);
281: ++k;
282: gallopright = 0;
283: if (++i < llen && ++gallopleft >= MIN_GALLOP_GLOBAL) {
284: PetscInt l1, l2, diff1, diff2;
285: do {
286: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
287: /* search temp for right[j], can move up to that of temp into arr immediately */
288: PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1));
289: diff1 = l1 - i;
290: Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
291: k += diff1;
292: i = l1;
293: /* search right for temp[i], can move up to that many of right into arr */
294: PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2));
295: diff2 = l2 - j;
296: Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
297: k += diff2;
298: j = l2;
299: if (i >= llen || j > right) break;
300: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
301: ++MIN_GALLOP_GLOBAL;
302: }
303: } else {
304: Petsc_memmove2(arr + (k * asize), arr + (j * asize), asize, barr + (k * bsize), barr + (j * bsize), bsize);
305: ++k;
306: gallopleft = 0;
307: if (++j <= right && ++gallopright >= MIN_GALLOP_GLOBAL) {
308: PetscInt l1, l2, diff1, diff2;
309: do {
310: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
311: /* search right for temp[i], can move up to that many of right into arr */
312: PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, j, right, atarr + (i * asize), &l2));
313: diff2 = l2 - j;
314: Petsc_memmove2(arr + (k * asize), arr + (j * asize), diff2 * asize, barr + (k * bsize), barr + (j * bsize), diff2 * bsize);
315: k += diff2;
316: j = l2;
317: /* search temp for right[j], can copy up to that of temp into arr immediately */
318: PetscCall(PetscGallopSearchLeft_Private(atarr, asize, cmp, ctx, i, llen - 1, arr + (j * asize), &l1));
319: diff1 = l1 - i;
320: Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), diff1 * asize, barr + (k * bsize), btarr + (i * bsize), diff1 * bsize);
321: k += diff1;
322: i = l1;
323: if (i >= llen || j > right) break;
324: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
325: ++MIN_GALLOP_GLOBAL;
326: }
327: }
328: }
329: if (i < llen) Petsc_memcpy2(arr + (k * asize), atarr + (i * asize), (llen - i) * asize, barr + (k * bsize), btarr + (i * bsize), (llen - i) * bsize);
330: PetscFunctionReturn(PETSC_SUCCESS);
331: }
333: /* Mergesort where size of right half < size of left half, so mergesort is done right to left. Arr should be pointer to
334: complete array, left is first index of left array, mid is first index of right array, right is last index of right
335: array */
336: static inline PetscErrorCode PetscTimSortMergeHi_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
337: {
338: PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
339: const PetscInt rlen = right - mid + 1;
341: PetscFunctionBegin;
342: Petsc_memcpy(tarr, (arr) + mid * size, rlen * size);
343: while ((i >= 0) && (j >= left)) {
344: if ((*cmp)((tarr) + i * size, (arr) + j * size, ctx) > 0) {
345: Petsc_memcpy((arr) + k * size, (tarr) + i * size, size);
346: --k;
347: gallopleft = 0;
348: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
349: PetscInt l1, l2, diff1, diff2;
350: do {
351: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
352: /* search temp for left[j], can copy up to that many of temp into arr */
353: PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1));
354: diff1 = i - l1;
355: Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
356: k -= diff1;
357: i = l1;
358: /* search left for temp[i], can move up to that many of left up arr */
359: PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2));
360: diff2 = j - l2;
361: Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
362: k -= diff2;
363: j = l2;
364: if (i < 0 || j < left) break;
365: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
366: ++MIN_GALLOP_GLOBAL;
367: }
368: } else {
369: Petsc_memmove((arr) + k * size, (arr) + j * size, size);
370: --k;
371: gallopright = 0;
372: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
373: PetscInt l1, l2, diff1, diff2;
374: do {
375: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
376: /* search left for temp[i], can move up to that many of left up arr */
377: PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, left, j, (tarr) + i * size, &l2));
378: diff2 = j - l2;
379: Petsc_memmove((arr) + (k - diff2 + 1) * size, (arr) + (l2 + 1) * size, diff2 * size);
380: k -= diff2;
381: j = l2;
382: /* search temp for left[j], can copy up to that many of temp into arr */
383: PetscCall(PetscGallopSearchRight_Private(tarr, size, cmp, ctx, 0, i, (arr) + j * size, &l1));
384: diff1 = i - l1;
385: Petsc_memcpy((arr) + (k - diff1 + 1) * size, (tarr) + (l1 + 1) * size, diff1 * size);
386: k -= diff1;
387: i = l1;
388: if (i < 0 || j < left) break;
389: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
390: ++MIN_GALLOP_GLOBAL;
391: }
392: }
393: }
394: if (i >= 0) Petsc_memcpy((arr) + left * size, tarr, (i + 1) * size);
395: PetscFunctionReturn(PETSC_SUCCESS);
396: }
398: static inline PetscErrorCode PetscTimSortMergeHiWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt mid, PetscInt right)
399: {
400: PetscInt i = right - mid, j = mid - 1, k = right, gallopleft = 0, gallopright = 0;
401: const PetscInt rlen = right - mid + 1;
403: PetscFunctionBegin;
404: Petsc_memcpy2(atarr, (arr) + mid * asize, rlen * asize, btarr, (barr) + mid * bsize, rlen * bsize);
405: while ((i >= 0) && (j >= left)) {
406: if ((*cmp)((atarr) + i * asize, (arr) + j * asize, ctx) > 0) {
407: Petsc_memcpy2((arr) + k * asize, (atarr) + i * asize, asize, (barr) + k * bsize, (btarr) + i * bsize, bsize);
408: --k;
409: gallopleft = 0;
410: if (--i >= 0 && ++gallopright >= MIN_GALLOP_GLOBAL) {
411: PetscInt l1, l2, diff1, diff2;
412: do {
413: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
414: /* search temp for left[j], can copy up to that many of temp into arr */
415: PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1));
416: diff1 = i - l1;
417: Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
418: k -= diff1;
419: i = l1;
420: /* search left for temp[i], can move up to that many of left up arr */
421: PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2));
422: diff2 = j - l2;
423: Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
424: k -= diff2;
425: j = l2;
426: if (i < 0 || j < left) break;
427: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
428: ++MIN_GALLOP_GLOBAL;
429: }
430: } else {
431: Petsc_memmove2((arr) + k * asize, (arr) + j * asize, asize, (barr) + k * bsize, (barr) + j * bsize, bsize);
432: --k;
433: gallopright = 0;
434: if (--j >= left && ++gallopleft >= MIN_GALLOP_GLOBAL) {
435: PetscInt l1, l2, diff1, diff2;
436: do {
437: if (MIN_GALLOP_GLOBAL > 1) --MIN_GALLOP_GLOBAL;
438: /* search left for temp[i], can move up to that many of left up arr */
439: PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, left, j, (atarr) + i * asize, &l2));
440: diff2 = j - l2;
441: Petsc_memmove2((arr) + (k - diff2 + 1) * asize, (arr) + (l2 + 1) * asize, diff2 * asize, (barr) + (k - diff2 + 1) * bsize, (barr) + (l2 + 1) * bsize, diff2 * bsize);
442: k -= diff2;
443: j = l2;
444: /* search temp for left[j], can copy up to that many of temp into arr */
445: PetscCall(PetscGallopSearchRight_Private(atarr, asize, cmp, ctx, 0, i, (arr) + j * asize, &l1));
446: diff1 = i - l1;
447: Petsc_memcpy2((arr) + (k - diff1 + 1) * asize, (atarr) + (l1 + 1) * asize, diff1 * asize, (barr) + (k - diff1 + 1) * bsize, (btarr) + (l1 + 1) * bsize, diff1 * bsize);
448: k -= diff1;
449: i = l1;
450: if (i < 0 || j < left) break;
451: } while (diff1 > MIN_GALLOP_GLOBAL || diff2 > MIN_GALLOP_GLOBAL);
452: ++MIN_GALLOP_GLOBAL;
453: }
454: }
455: }
456: if (i >= 0) Petsc_memcpy2((arr) + left * asize, atarr, (i + 1) * asize, (barr) + left * bsize, btarr, (i + 1) * bsize);
457: PetscFunctionReturn(PETSC_SUCCESS);
458: }
460: /* Left is inclusive lower bound of array slice, start is start location of unsorted section, right is inclusive upper
461: bound of array slice. If unsure of where unsorted section starts or if entire length is unsorted pass start = left */
462: static inline PetscErrorCode PetscInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
463: {
464: PetscInt i = start == left ? start + 1 : start;
466: PetscFunctionBegin;
467: for (; i <= right; ++i) {
468: PetscInt j = i - 1;
469: Petsc_memcpy(tarr, arr + (i * size), size);
470: while ((j >= left) && ((*cmp)(tarr, (arr) + j * size, ctx) < 0)) {
471: COPYSWAPPY(arr + (j + 1) * size, arr + j * size, tarr + size, size);
472: --j;
473: }
474: Petsc_memcpy((arr) + (j + 1) * size, tarr, size);
475: }
476: PetscFunctionReturn(PETSC_SUCCESS);
477: }
479: static inline PetscErrorCode PetscInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
480: {
481: PetscInt i = start == left ? start + 1 : start;
483: PetscFunctionBegin;
484: for (; i <= right; ++i) {
485: PetscInt j = i - 1;
486: Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
487: while ((j >= left) && ((*cmp)(atarr, arr + (j * asize), ctx) < 0)) {
488: COPYSWAPPY2(arr + (j + 1) * asize, arr + j * asize, asize, barr + (j + 1) * bsize, barr + j * bsize, bsize, atarr + asize);
489: --j;
490: }
491: Petsc_memcpy2(arr + (j + 1) * asize, atarr, asize, barr + (j + 1) * bsize, btarr, bsize);
492: }
493: PetscFunctionReturn(PETSC_SUCCESS);
494: }
496: /* See PetscInsertionSort_Private */
497: static inline PetscErrorCode PetscBinaryInsertionSort_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
498: {
499: PetscInt i = start == left ? start + 1 : start;
501: PetscFunctionBegin;
502: for (; i <= right; ++i) {
503: PetscInt l = left, r = i;
504: Petsc_memcpy(tarr, arr + (i * size), size);
505: do {
506: const PetscInt m = l + ((r - l) >> 1);
507: if ((*cmp)(tarr, arr + (m * size), ctx) < 0) {
508: r = m;
509: } else {
510: l = m + 1;
511: }
512: } while (l < r);
513: Petsc_memmove(arr + ((l + 1) * size), arr + (l * size), (i - l) * size);
514: Petsc_memcpy(arr + (l * size), tarr, size);
515: }
516: PetscFunctionReturn(PETSC_SUCCESS);
517: }
519: static inline PetscErrorCode PetscBinaryInsertionSortWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt left, PetscInt start, PetscInt right)
520: {
521: PetscInt i = start == left ? start + 1 : start;
523: PetscFunctionBegin;
524: for (; i <= right; ++i) {
525: PetscInt l = left, r = i;
526: Petsc_memcpy2(atarr, arr + (i * asize), asize, btarr, barr + (i * bsize), bsize);
527: do {
528: const PetscInt m = l + ((r - l) >> 1);
529: if ((*cmp)(atarr, arr + (m * asize), ctx) < 0) {
530: r = m;
531: } else {
532: l = m + 1;
533: }
534: } while (l < r);
535: Petsc_memmove2(arr + ((l + 1) * asize), arr + (l * asize), (i - l) * asize, barr + ((l + 1) * bsize), barr + (l * bsize), (i - l) * bsize);
536: Petsc_memcpy2(arr + (l * asize), atarr, asize, barr + (l * bsize), btarr, bsize);
537: }
538: PetscFunctionReturn(PETSC_SUCCESS);
539: }
541: typedef struct {
542: PetscInt size;
543: PetscInt start;
544: #if defined(__CRAY_AARCH64) /* segfaults with Cray compilers for aarch64 on a64FX */
545: } PetscTimSortStack;
546: #else
547: } PetscTimSortStack PETSC_ATTRIBUTEALIGNED(2 * sizeof(PetscInt));
548: #endif
550: typedef struct {
551: char *ptr PETSC_ATTRIBUTEALIGNED(PETSC_MEMALIGN);
552: size_t size;
553: size_t maxsize;
554: } PetscTimSortBuffer;
556: static inline PetscErrorCode PetscTimSortResizeBuffer_Private(PetscTimSortBuffer *buff, size_t newSize)
557: {
558: PetscFunctionBegin;
559: if (PetscLikely(newSize <= buff->size)) PetscFunctionReturn(PETSC_SUCCESS);
560: {
561: /* Can't be larger than n, there is merit to simply allocating buff to n to begin with */
562: size_t newMax = PetscMin(newSize * newSize, buff->maxsize);
563: PetscCall(PetscFree(buff->ptr));
564: PetscCall(PetscMalloc1(newMax, &buff->ptr));
565: buff->size = newMax;
566: }
567: PetscFunctionReturn(PETSC_SUCCESS);
568: }
570: static inline PetscErrorCode PetscTimSortForceCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt stacksize)
571: {
572: PetscFunctionBegin;
573: for (; stacksize; --stacksize) {
574: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
575: if ((*cmp)(arr + (stack[stacksize].start - 1) * size, arr + (stack[stacksize].start) * size, ctx) > 0) {
576: PetscInt l, m = stack[stacksize].start, r;
577: /* Search A for B[0] insertion */
578: PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * size, &l));
579: /* Search B for A[-1] insertion */
580: PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * size, &r));
581: if (m - l <= r - m) {
582: PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
583: PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
584: } else {
585: PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
586: PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
587: }
588: }
589: /* Update A with merge */
590: stack[stacksize - 1].size += stack[stacksize].size;
591: }
592: PetscFunctionReturn(PETSC_SUCCESS);
593: }
595: static inline PetscErrorCode PetscTimSortForceCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt stacksize)
596: {
597: PetscFunctionBegin;
598: for (; stacksize; --stacksize) {
599: /* A = stack[i-1], B = stack[i], if A[-1] <= B[0] means sorted */
600: if ((*cmp)(arr + (stack[stacksize].start - 1) * asize, arr + (stack[stacksize].start) * asize, ctx) > 0) {
601: PetscInt l, m = stack[stacksize].start, r;
602: /* Search A for B[0] insertion */
603: PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[stacksize - 1].start, stack[stacksize].start - 1, (arr) + (stack[stacksize].start) * asize, &l));
604: /* Search B for A[-1] insertion */
605: PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[stacksize].start, stack[stacksize].start + stack[stacksize].size - 1, (arr) + (stack[stacksize].start - 1) * asize, &r));
606: if (m - l <= r - m) {
607: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
608: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
609: PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
610: } else {
611: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
612: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
613: PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
614: }
615: }
616: /* Update A with merge */
617: stack[stacksize - 1].size += stack[stacksize].size;
618: }
619: PetscFunctionReturn(PETSC_SUCCESS);
620: }
622: static inline PetscErrorCode PetscTimSortMergeCollapse_Private(char *arr, size_t size, CompFunc cmp, void *ctx, PetscTimSortBuffer *buff, PetscTimSortStack *stack, PetscInt *stacksize)
623: {
624: PetscInt i = *stacksize;
626: PetscFunctionBegin;
627: while (i) {
628: PetscInt l, m, r, itemp = i;
630: if (i == 1) {
631: /* A = stack[i-1], B = stack[i] */
632: if (stack[i - 1].size < stack[i].size) {
633: /* if A[-1] <= B[0] then sorted */
634: if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
635: m = stack[i].start;
636: /* Search A for B[0] insertion */
637: PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * size, &l));
638: /* Search B for A[-1] insertion */
639: PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * size, &r));
640: if (m - l <= r - m) {
641: PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
642: PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
643: } else {
644: PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
645: PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
646: }
647: }
648: /* Update A with merge */
649: stack[i - 1].size += stack[i].size;
650: --i;
651: }
652: } else {
653: /* i > 2, i.e. C exists
654: A = stack[i-2], B = stack[i-1], C = stack[i]; */
655: if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
656: if (stack[i - 2].size < stack[i].size) {
657: /* merge B into A, but if A[-1] <= B[0] then already sorted */
658: if ((*cmp)(arr + (stack[i - 1].start - 1) * size, arr + (stack[i - 1].start) * size, ctx) > 0) {
659: m = stack[i - 1].start;
660: /* Search A for B[0] insertion */
661: PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * size, &l));
662: /* Search B for A[-1] insertion */
663: PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * size, &r));
664: if (m - l <= r - m) {
665: PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
666: PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
667: } else {
668: PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
669: PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
670: }
671: }
672: /* Update A with merge */
673: stack[i - 2].size += stack[i - 1].size;
674: /* Push C up the stack */
675: stack[i - 1].start = stack[i].start;
676: stack[i - 1].size = stack[i].size;
677: } else {
678: /* merge C into B */
679: mergeBC:
680: /* If B[-1] <= C[0] then... you know the drill */
681: if ((*cmp)(arr + (stack[i].start - 1) * size, arr + (stack[i].start) * size, ctx) > 0) {
682: m = stack[i].start;
683: /* Search B for C[0] insertion */
684: PetscCall(PetscGallopSearchLeft_Private(arr, size, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * size, &l));
685: /* Search C for B[-1] insertion */
686: PetscCall(PetscGallopSearchRight_Private(arr, size, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * size, &r));
687: if (m - l <= r - m) {
688: PetscCall(PetscTimSortResizeBuffer_Private(buff, (m - l + 1) * size));
689: PetscCall(PetscTimSortMergeLo_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
690: } else {
691: PetscCall(PetscTimSortResizeBuffer_Private(buff, (r - m + 1) * size));
692: PetscCall(PetscTimSortMergeHi_Private(arr, buff->ptr, size, cmp, ctx, l, m, r));
693: }
694: }
695: /* Update B with merge */
696: stack[i - 1].size += stack[i].size;
697: }
698: --i;
699: } else if (stack[i - 1].size <= stack[i].size) {
700: /* merge C into B */
701: goto mergeBC;
702: }
703: }
704: if (itemp == i) break;
705: }
706: *stacksize = i;
707: PetscFunctionReturn(PETSC_SUCCESS);
708: }
710: static inline PetscErrorCode PetscTimSortMergeCollapseWithArray_Private(char *arr, size_t asize, char *barr, size_t bsize, CompFunc cmp, void *ctx, PetscTimSortBuffer *abuff, PetscTimSortBuffer *bbuff, PetscTimSortStack *stack, PetscInt *stacksize)
711: {
712: PetscInt i = *stacksize;
714: PetscFunctionBegin;
715: while (i) {
716: PetscInt l, m, r, itemp = i;
718: if (i == 1) {
719: /* A = stack[i-1], B = stack[i] */
720: if (stack[i - 1].size < stack[i].size) {
721: /* if A[-1] <= B[0] then sorted */
722: if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
723: m = stack[i].start;
724: /* Search A for B[0] insertion */
725: PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, (arr) + stack[i].start * asize, &l));
726: /* Search B for A[-1] insertion */
727: PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, arr + (stack[i].start - 1) * asize, &r));
728: if (m - l <= r - m) {
729: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
730: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
731: PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
732: } else {
733: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
734: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
735: PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
736: }
737: }
738: /* Update A with merge */
739: stack[i - 1].size += stack[i].size;
740: --i;
741: }
742: } else {
743: /* i > 2, i.e. C exists
744: A = stack[i-2], B = stack[i-1], C = stack[i]; */
745: if (stack[i - 2].size <= stack[i - 1].size + stack[i].size) {
746: if (stack[i - 2].size < stack[i].size) {
747: /* merge B into A, but if A[-1] <= B[0] then already sorted */
748: if ((*cmp)(arr + (stack[i - 1].start - 1) * asize, arr + (stack[i - 1].start) * asize, ctx) > 0) {
749: m = stack[i - 1].start;
750: /* Search A for B[0] insertion */
751: PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 2].start, stack[i - 1].start - 1, (arr) + (stack[i - 1].start) * asize, &l));
752: /* Search B for A[-1] insertion */
753: PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i - 1].start + stack[i - 1].size - 1, (arr) + (stack[i - 1].start - 1) * asize, &r));
754: if (m - l <= r - m) {
755: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
756: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
757: PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
758: } else {
759: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
760: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
761: PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
762: }
763: }
764: /* Update A with merge */
765: stack[i - 2].size += stack[i - 1].size;
766: /* Push C up the stack */
767: stack[i - 1].start = stack[i].start;
768: stack[i - 1].size = stack[i].size;
769: } else {
770: /* merge C into B */
771: mergeBC:
772: /* If B[-1] <= C[0] then... you know the drill */
773: if ((*cmp)(arr + (stack[i].start - 1) * asize, arr + (stack[i].start) * asize, ctx) > 0) {
774: m = stack[i].start;
775: /* Search B for C[0] insertion */
776: PetscCall(PetscGallopSearchLeft_Private(arr, asize, cmp, ctx, stack[i - 1].start, stack[i].start - 1, arr + stack[i].start * asize, &l));
777: /* Search C for B[-1] insertion */
778: PetscCall(PetscGallopSearchRight_Private(arr, asize, cmp, ctx, stack[i].start, stack[i].start + stack[i].size - 1, (arr) + (stack[i].start - 1) * asize, &r));
779: if (m - l <= r - m) {
780: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (m - l + 1) * asize));
781: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (m - l + 1) * bsize));
782: PetscCall(PetscTimSortMergeLoWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
783: } else {
784: PetscCall(PetscTimSortResizeBuffer_Private(abuff, (r - m + 1) * asize));
785: PetscCall(PetscTimSortResizeBuffer_Private(bbuff, (r - m + 1) * bsize));
786: PetscCall(PetscTimSortMergeHiWithArray_Private(arr, abuff->ptr, asize, barr, bbuff->ptr, bsize, cmp, ctx, l, m, r));
787: }
788: }
789: /* Update B with merge */
790: stack[i - 1].size += stack[i].size;
791: }
792: --i;
793: } else if (stack[i - 1].size <= stack[i].size) {
794: /* merge C into B */
795: goto mergeBC;
796: }
797: }
798: if (itemp == i) break;
799: }
800: *stacksize = i;
801: PetscFunctionReturn(PETSC_SUCCESS);
802: }
804: /* March sequentially through the array building up a "run" of weakly increasing or strictly decreasing contiguous
805: elements. Decreasing runs are reversed by swapping. If the run is less than minrun, artificially extend it via either
806: binary insertion sort or regulat insertion sort */
807: static inline PetscErrorCode PetscTimSortBuildRun_Private(char *arr, char *tarr, size_t size, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
808: {
809: const PetscInt re = PetscMin(runstart + minrun, n - 1);
810: PetscInt ri = runstart;
812: PetscFunctionBegin;
813: if (PetscUnlikely(runstart == n - 1)) {
814: *runend = runstart;
815: PetscFunctionReturn(PETSC_SUCCESS);
816: }
817: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
818: if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) {
819: ++ri;
820: while (ri < n - 1) {
821: if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) >= 0) break;
822: ++ri;
823: }
824: {
825: PetscInt lo = runstart, hi = ri;
826: for (; lo < hi; ++lo, --hi) COPYSWAPPY(arr + lo * size, arr + hi * size, tarr, size);
827: }
828: } else {
829: ++ri;
830: while (ri < n - 1) {
831: if ((*cmp)((arr) + (ri + 1) * size, (arr) + ri * size, ctx) < 0) break;
832: ++ri;
833: }
834: }
835: if (ri < re) {
836: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
837: binary search */
838: if (ri - runstart <= minrun >> 1) {
839: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
840: PetscCall(PetscInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re));
841: } else {
842: PetscCall(PetscBinaryInsertionSort_Private(arr, tarr, size, cmp, ctx, runstart, ri, re));
843: }
844: *runend = re;
845: } else *runend = ri;
846: PetscFunctionReturn(PETSC_SUCCESS);
847: }
849: static inline PetscErrorCode PetscTimSortBuildRunWithArray_Private(char *arr, char *atarr, size_t asize, char *barr, char *btarr, size_t bsize, CompFunc cmp, void *ctx, PetscInt n, PetscInt minrun, PetscInt runstart, PetscInt *runend)
850: {
851: const PetscInt re = PetscMin(runstart + minrun, n - 1);
852: PetscInt ri = runstart;
854: PetscFunctionBegin;
855: if (PetscUnlikely(runstart == n - 1)) {
856: *runend = runstart;
857: PetscFunctionReturn(PETSC_SUCCESS);
858: }
859: /* guess whether run is ascending or descending and tally up the longest consecutive run. essentially a coinflip for random data */
860: if ((*cmp)((arr) + (ri + 1) * asize, arr + (ri * asize), ctx) < 0) {
861: ++ri;
862: while (ri < n - 1) {
863: if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) >= 0) break;
864: ++ri;
865: }
866: {
867: PetscInt lo = runstart, hi = ri;
868: for (; lo < hi; ++lo, --hi) COPYSWAPPY2(arr + lo * asize, arr + hi * asize, asize, barr + lo * bsize, barr + hi * bsize, bsize, atarr);
869: }
870: } else {
871: ++ri;
872: while (ri < n - 1) {
873: if ((*cmp)((arr) + (ri + 1) * asize, (arr) + ri * asize, ctx) < 0) break;
874: ++ri;
875: }
876: }
877: if (ri < re) {
878: /* the attempt failed, this section likely contains random data. If ri got close to minrun (within 50%) then we try
879: binary search */
880: if (ri - runstart <= minrun >> 1) {
881: ++MIN_GALLOP_GLOBAL; /* didn't get close hedge our bets against random data */
882: PetscCall(PetscInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re));
883: } else {
884: PetscCall(PetscBinaryInsertionSortWithArray_Private(arr, atarr, asize, barr, btarr, bsize, cmp, ctx, runstart, ri, re));
885: }
886: *runend = re;
887: } else *runend = ri;
888: PetscFunctionReturn(PETSC_SUCCESS);
889: }
891: /*@C
892: PetscTimSort - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm.
894: Not Collective, No Fortran Support
896: Input Parameters:
897: + n - number of values
898: . arr - array to be sorted
899: . size - size in bytes of the datatype held in arr
900: . cmp - function pointer to comparison function
901: - ctx - optional context to be passed to comparison function, NULL if not needed
903: Output Parameter:
904: . arr - sorted array
906: Level: developer
908: Notes:
909: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
910: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims to
911: select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced arrays. To
912: do so it repeatedly triggers attempts throughout to merge adjacent runs.
914: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
915: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
916: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
917: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
918: likely all contain similar data.
920: Example Usage:
921: The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
922: between its arguments. If left < right : return -1, if left == right : return 0, if left > right : return 1. The user
923: may also
924: change or reverse the order of the sort by flipping the above. Note that stability of the sort is only guaranteed if
925: the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in increasing
926: order
928: .vb
929: int increasing_comparison_function(const void *left, const void *right, void *ctx) {
930: my_type l = *(my_type *) left, r = *(my_type *) right;
931: return (l < r) ? -1 : (l > r);
932: }
933: .ve
935: Note the context is unused here but you may use it to pass and subsequently access whatever information required
936: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
937: Then pass the function
938: .vb
939: PetscTimSort(n, arr, sizeof(arr[0]), increasing_comparison_function, ctx)
940: .ve
942: Fortran Notes:
943: To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
944: returns result. For example
945: .vb
946: subroutine CompareIntegers(left,right,ctx,result)
947: implicit none
949: PetscInt,intent(in) :: left, right
950: type(UserCtx) :: ctx
951: integer,intent(out) :: result
953: if (left < right) then
954: result = -1
955: else if (left == right) then
956: result = 0
957: else
958: result = 1
959: end if
960: return
961: end subroutine CompareIntegers
962: .ve
964: .seealso: `PetscTimSortWithArray()`, `PetscIntSortSemiOrdered()`, `PetscRealSortSemiOrdered()`, `PetscMPIIntSortSemiOrdered()`
965: @*/
966: PetscErrorCode PetscTimSort(PetscInt n, void *arr, size_t size, int (*cmp)(const void *, const void *, void *), void *ctx)
967: {
968: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
969: PetscTimSortStack runstack[128];
970: PetscTimSortBuffer buff;
971: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
972: It is so unlikely that this limit is reached that this is __never__ checked for */
974: PetscFunctionBegin;
975: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
976: is a power of 2 or one plus a power of 2 */
977: {
978: PetscInt t = n, r = 0;
979: /* r becomes 1 if the least significant bits contain at least one off bit */
980: while (t >= 64) {
981: r |= t & 1;
982: t >>= 1;
983: }
984: minrun = t + r;
985: }
986: if (PetscDefined(USE_DEBUG)) {
987: PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun));
988: if (n < 64) {
989: PetscCall(PetscInfo(NULL, "n %" PetscInt_FMT " < 64, consider using PetscSortInt() instead\n", n));
990: } else PetscCheck(!(minrun < 32) && !(minrun > 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun);
991: }
992: PetscCall(PetscMalloc1((size_t)minrun * size, &buff.ptr));
993: buff.size = (size_t)minrun * size;
994: buff.maxsize = (size_t)n * size;
995: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
996: while (runstart < n) {
997: /* Check if additional entries are at least partially ordered and build natural run */
998: PetscCall(PetscTimSortBuildRun_Private((char *)arr, buff.ptr, size, cmp, ctx, n, minrun, runstart, &runend));
999: runstack[stacksize].start = runstart;
1000: runstack[stacksize].size = runend - runstart + 1;
1001: PetscCall(PetscTimSortMergeCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, &stacksize));
1002: ++stacksize;
1003: runstart = runend + 1;
1004: }
1005: /* Have been inside while, so discard last stacksize++ */
1006: --stacksize;
1007: PetscCall(PetscTimSortForceCollapse_Private((char *)arr, size, cmp, ctx, &buff, runstack, stacksize));
1008: PetscCall(PetscFree(buff.ptr));
1009: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1010: PetscFunctionReturn(PETSC_SUCCESS);
1011: }
1013: /*@C
1014: PetscTimSortWithArray - Sorts an array in place in increasing order using Tim Peters <https://bugs.python.org/file4451/timsort.txt> adaptive sorting algorithm and
1015: reorders a second array to match the first. The arrays need not be the same type.
1017: Not Collective, No Fortran Support
1019: Input Parameters:
1020: + n - number of values
1021: . asize - size in bytes of the datatype held in arr
1022: . bsize - size in bytes of the datatype held in barr
1023: . cmp - function pointer to comparison function
1024: - ctx - optional context to be passed to comparison function, NULL if not needed
1026: Input/Output Parameters:
1027: + arr - array to be sorted, on output it is sorted
1028: - barr - array to be reordered, on output it is reordered
1030: Level: developer
1032: Notes:
1033: The arrays need not be of the same type, however barr MUST contain at least as many elements as arr and the two CANNOT
1034: overlap.
1036: Timsort makes the assumption that input data is already likely partially ordered, or that it contains contiguous
1037: sections (termed 'runs') where the data is locally ordered (but not necessarily globally ordered). It therefore aims
1038: to select slices of the array in such a way that resulting mergesorts operate on near perfectly length-balanced
1039: arrays. To do so it repeatedly triggers attempts throughout to merge adjacent runs.
1041: Should one run continuously "win" a comparison the algorithm begins the "gallop" phase. It will aggressively search
1042: the "winner" for the location of the "losers" next entry (and vice versa) to copy all preceding elements into place in
1043: bulk. However if the data is truly unordered (as is the case with random data) the immense gains possible from these
1044: searches are expected __not__ to repay their costs. While adjacent arrays are almost all nearly the same size, they
1045: likely all contain similar data.
1047: Example Usage:
1048: The comparison function must follow the `qsort()` comparison function paradigm, returning the sign of the difference
1049: between its arguments. If left < right \: return -1, if left == right \: return 0, if left > right \: return 1. The user
1050: may also change or reverse the order of the sort by flipping the above. Note that stability of the sort is only
1051: guaranteed if the comparison function forms a valid trigraph. For example when sorting an array of type "my_type" in
1052: increasing order
1054: .vb
1055: int increasing_comparison_function(const void *left, const void *right, void *ctx) {
1056: my_type l = *(my_type *) left, r = *(my_type *) right;
1057: return (l < r) ? -1 : (l > r);
1058: }
1059: .ve
1061: Note the context is unused here but you may use it to pass and subsequently access whatever information required
1062: inside the comparison function. The context pointer will unaltered except for any changes made inside the comparison function.
1063: Then pass the function
1064: .vb
1065: PetscTimSortWithArray(n, arr, sizeof(arr[0]), barr, sizeof(barr[0]), increasing_comparison_function, ctx)
1066: .ve
1068: Fortran Notes:
1069: To use this from Fortran you must write a comparison subroutine with 4 arguments which accepts left, right, ctx and
1070: returns result. For example
1071: .vb
1072: subroutine CompareIntegers(left,right,ctx,result)
1073: implicit none
1075: PetscInt,intent(in) :: left, right
1076: type(UserCtx) :: ctx
1077: integer,intent(out) :: result
1079: if (left < right) then
1080: result = -1
1081: else if (left == right) then
1082: result = 0
1083: else
1084: result = 1
1085: end if
1086: return
1087: end subroutine CompareIntegers
1088: .ve
1090: .seealso: `PetscTimSort()`, `PetscIntSortSemiOrderedWithArray()`, `PetscRealSortSemiOrderedWithArrayInt()`, `PetscMPIIntSortSemiOrderedWithArray()`
1091: @*/
1092: PetscErrorCode PetscTimSortWithArray(PetscInt n, void *arr, size_t asize, void *barr, size_t bsize, int (*cmp)(const void *, const void *, void *), void *ctx)
1093: {
1094: PetscInt stacksize = 0, minrun, runstart = 0, runend = 0;
1095: PetscTimSortStack runstack[128];
1096: PetscTimSortBuffer abuff, bbuff;
1097: /* stacksize = log_phi(n) = log_2(n)/log_2(phi), so 128 is enough for ~5.614e26 elements.
1098: It is so unlikely that this limit is reached that this is __never__ checked for */
1100: PetscFunctionBegin;
1101: /* Compute minrun. Minrun should be (32, 65) such that N/minrun
1102: is a power of 2 or one plus a power of 2 */
1103: {
1104: PetscInt t = n, r = 0;
1105: /* r becomes 1 if the least significant bits contain at least one off bit */
1106: while (t >= 64) {
1107: r |= t & 1;
1108: t >>= 1;
1109: }
1110: minrun = t + r;
1111: }
1112: if (PetscDefined(USE_DEBUG)) {
1113: PetscCall(PetscInfo(NULL, "minrun = %" PetscInt_FMT "\n", minrun));
1114: PetscCheck(n < 64 || (minrun >= 32 && minrun <= 65), PETSC_COMM_SELF, PETSC_ERR_PLIB, "Calculated minrun %" PetscInt_FMT " not in range (32,65)", minrun);
1115: }
1116: PetscCall(PetscMalloc1((size_t)minrun * asize, &abuff.ptr));
1117: abuff.size = (size_t)minrun * asize;
1118: abuff.maxsize = (size_t)n * asize;
1119: PetscCall(PetscMalloc1((size_t)minrun * bsize, &bbuff.ptr));
1120: bbuff.size = (size_t)minrun * bsize;
1121: bbuff.maxsize = (size_t)n * bsize;
1122: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1123: while (runstart < n) {
1124: /* Check if additional entries are at least partially ordered and build natural run */
1125: PetscCall(PetscTimSortBuildRunWithArray_Private((char *)arr, abuff.ptr, asize, (char *)barr, bbuff.ptr, bsize, cmp, ctx, n, minrun, runstart, &runend));
1126: runstack[stacksize].start = runstart;
1127: runstack[stacksize].size = runend - runstart + 1;
1128: PetscCall(PetscTimSortMergeCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, &stacksize));
1129: ++stacksize;
1130: runstart = runend + 1;
1131: }
1132: /* Have been inside while, so discard last stacksize++ */
1133: --stacksize;
1134: PetscCall(PetscTimSortForceCollapseWithArray_Private((char *)arr, asize, (char *)barr, bsize, cmp, ctx, &abuff, &bbuff, runstack, stacksize));
1135: PetscCall(PetscFree(abuff.ptr));
1136: PetscCall(PetscFree(bbuff.ptr));
1137: MIN_GALLOP_GLOBAL = MIN_GALLOP_CONST_GLOBAL;
1138: PetscFunctionReturn(PETSC_SUCCESS);
1139: }
1141: /*@
1142: PetscIntSortSemiOrdered - Sorts an array of `PetscInt` in place in increasing order.
1144: Not Collective
1146: Input Parameters:
1147: + n - number of values
1148: - arr - array of integers
1150: Output Parameter:
1151: . arr - sorted array of integers
1153: Level: intermediate
1155: Notes:
1156: If the array is less than 64 entries long `PetscSortInt()` is automatically used.
1158: This function serves as an alternative to `PetscSortInt()`. While this function works for any array of integers it is
1159: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1160: recommended that the user benchmark their code to see which routine is fastest.
1162: .seealso: `PetscTimSort()`, `PetscSortInt()`, `PetscSortIntWithPermutation()`
1163: @*/
1164: PetscErrorCode PetscIntSortSemiOrdered(PetscInt n, PetscInt arr[])
1165: {
1166: PetscFunctionBegin;
1167: if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1168: PetscAssertPointer(arr, 2);
1169: if (n < 64) {
1170: PetscCall(PetscSortInt(n, arr));
1171: } else {
1172: PetscCall(PetscTimSort(n, arr, sizeof(PetscInt), Compare_PetscInt_Private, NULL));
1173: }
1174: PetscFunctionReturn(PETSC_SUCCESS);
1175: }
1177: /*@
1178: PetscIntSortSemiOrderedWithArray - Sorts an array of `PetscInt` in place in increasing order and reorders a second
1179: `PetscInt` array to match the first.
1181: Not Collective
1183: Input Parameter:
1184: . n - number of values
1186: Input/Output Parameters:
1187: + arr1 - array of integers to be sorted, modified on output
1188: - arr2 - array of integers to be reordered, modified on output
1190: Level: intermediate
1192: Notes:
1193: The arrays CANNOT overlap.
1195: This function serves as an alternative to `PetscSortIntWithArray()`. While this function works for any array of integers it is
1196: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1197: recommended that the user benchmark their code to see which routine is fastest.
1199: .seealso: `PetscTimSortWithArray()`, `PetscSortIntWithArray()`, `PetscSortIntWithPermutation()`
1200: @*/
1201: PetscErrorCode PetscIntSortSemiOrderedWithArray(PetscInt n, PetscInt arr1[], PetscInt arr2[])
1202: {
1203: PetscFunctionBegin;
1204: if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1205: PetscAssertPointer(arr1, 2);
1206: PetscAssertPointer(arr2, 3);
1207: /* cannot export out to PetscIntSortWithArray here since it isn't stable */
1208: PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscInt), arr2, sizeof(PetscInt), Compare_PetscInt_Private, NULL));
1209: PetscFunctionReturn(PETSC_SUCCESS);
1210: }
1212: /*@
1213: PetscMPIIntSortSemiOrdered - Sorts an array of `PetscMPIInt` in place in increasing order.
1215: Not Collective
1217: Input Parameters:
1218: + n - number of values
1219: - arr - array of `PetscMPIInt`
1221: Output Parameter:
1222: . arr - sorted array of integers
1224: Level: intermediate
1226: Notes:
1227: If the array is less than 64 entries long `PetscSortMPIInt()` is automatically used.
1229: This function serves as an alternative to `PetscSortMPIInt()`. While this function works for any array of `PetscMPIInt` it is
1230: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1231: recommended that the user benchmark their code to see which routine is fastest.
1233: .seealso: `PetscTimSort()`, `PetscSortMPIInt()`
1234: @*/
1235: PetscErrorCode PetscMPIIntSortSemiOrdered(PetscInt n, PetscMPIInt arr[])
1236: {
1237: PetscFunctionBegin;
1238: if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1239: PetscAssertPointer(arr, 2);
1240: if (n < 64) {
1241: PetscCall(PetscSortMPIInt(n, arr));
1242: } else {
1243: PetscCall(PetscTimSort(n, arr, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL));
1244: }
1245: PetscFunctionReturn(PETSC_SUCCESS);
1246: }
1248: /*@
1249: PetscMPIIntSortSemiOrderedWithArray - Sorts an array of `PetscMPIInt` in place in increasing order and reorders a second `PetscMPIInt`
1250: array to match the first.
1252: Not Collective
1254: Input Parameter:
1255: . n - number of values
1257: Input/Output Parameters:
1258: + arr1 - array of integers to be sorted, modified on output
1259: - arr2 - array of integers to be reordered, modified on output
1261: Level: intermediate
1263: Notes:
1264: The arrays CANNOT overlap.
1266: This function serves as an alternative to `PetscSortMPIIntWithArray()`. While this function works for any array of integers it is
1267: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1268: recommended that the user benchmark their code to see which routine is fastest.
1270: .seealso: `PetscTimSortWithArray()`, `PetscSortMPIIntWithArray()`, `PetscSortMPIIntWithPermutation()`
1271: @*/
1272: PetscErrorCode PetscMPIIntSortSemiOrderedWithArray(PetscInt n, PetscMPIInt arr1[], PetscMPIInt arr2[])
1273: {
1274: PetscFunctionBegin;
1275: if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1276: PetscAssertPointer(arr1, 2);
1277: PetscAssertPointer(arr2, 3);
1278: /* cannot export out to PetscMPIIntSortWithArray here since it isn't stable */
1279: PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscMPIInt), arr2, sizeof(PetscMPIInt), Compare_PetscMPIInt_Private, NULL));
1280: PetscFunctionReturn(PETSC_SUCCESS);
1281: }
1283: /*@
1284: PetscRealSortSemiOrdered - Sorts an array of `PetscReal` in place in increasing order.
1286: Not Collective
1288: Input Parameters:
1289: + n - number of values
1290: - arr - array of `PetscReal`
1292: Output Parameter:
1293: . arr - sorted array of integers
1295: Level: intermediate
1297: Notes:
1298: If the array is less than 64 entries long `PetscSortReal()` is automatically used.
1300: This function serves as an alternative to `PetscSortReal()`. While this function works for any array of `PetscReal` it is
1301: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1302: recommended that the user benchmark their code to see which routine is fastest.
1304: .seealso: `PetscTimSort()`, `PetscSortReal()`, `PetscSortRealWithPermutation()`
1305: @*/
1306: PetscErrorCode PetscRealSortSemiOrdered(PetscInt n, PetscReal arr[])
1307: {
1308: PetscFunctionBegin;
1309: if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1310: PetscAssertPointer(arr, 2);
1311: if (n < 64) {
1312: PetscCall(PetscSortReal(n, arr));
1313: } else {
1314: PetscCall(PetscTimSort(n, arr, sizeof(PetscReal), Compare_PetscReal_Private, NULL));
1315: }
1316: PetscFunctionReturn(PETSC_SUCCESS);
1317: }
1319: /*@
1320: PetscRealSortSemiOrderedWithArrayInt - Sorts an array of `PetscReal` in place in increasing order and reorders a second
1321: array of `PetscInt` to match the first.
1323: Not Collective
1325: Input Parameter:
1326: . n - number of values
1328: Input/Output Parameters:
1329: + arr1 - array of `PetscReal` to be sorted, modified on output
1330: - arr2 - array of `PetscInt` to be reordered, modified on output
1332: Level: intermediate
1334: Notes:
1335: This function serves as an alternative to `PetscSortRealWithArray()`. While this function works for any array of `PetscReal` it is
1336: significantly faster if the array is not totally random. There are exceptions to this and so it is __highly__
1337: recommended that the user benchmark their code to see which routine is fastest.
1339: .seealso: `PetscTimSortWithArray()`, `PetscSortRealWithArrayInt()`, `PetscSortRealWithPermutation()`
1340: @*/
1341: PetscErrorCode PetscRealSortSemiOrderedWithArrayInt(PetscInt n, PetscReal arr1[], PetscInt arr2[])
1342: {
1343: PetscFunctionBegin;
1344: if (n <= 1) PetscFunctionReturn(PETSC_SUCCESS);
1345: PetscAssertPointer(arr1, 2);
1346: PetscAssertPointer(arr2, 3);
1347: /* cannot export out to PetscRealSortWithArrayInt here since it isn't stable */
1348: PetscCall(PetscTimSortWithArray(n, arr1, sizeof(PetscReal), arr2, sizeof(PetscInt), Compare_PetscReal_Private, NULL));
1349: PetscFunctionReturn(PETSC_SUCCESS);
1350: }