RFR: JDK-8266431: Dual-Pivot Quicksort improvements (Radix sort)
Richard Startin
github.com+16439049+richardstartin at openjdk.java.net
Mon Sep 13 17:28:45 UTC 2021
On Wed, 12 May 2021 12:20:09 GMT, iaroslavski <github.com+43264149+iaroslavski at openjdk.org> wrote:
>> src/java.base/share/classes/java/util/DualPivotQuicksort.java line 47:
>>
>>> 45: * @author Doug Lea
>>> 46: *
>>> 47: * @version 2020.06.14
>>
>> Vladimir, I would update to 2021.05.06 (+your hash)
>
> Laurent, the date in this class is not the date of our last commit,
> this date is the date when I have final idea regarding to Radix sort,
> therefore, I prefer to keep 2020.06.14
Hi @iaroslavski I'm unconvinced that this work was from 14/06/2020 - I believe this work derives from an unsigned radix sort I implemented on 10/04/2021 https://github.com/richardstartin/radix-sort-benchmark/commit/ab4da230e1d0ac68e5ee2cee38d71c7e7d50f49b#diff-6c13d3fb74f38906677dbfa1a70a123c8e5baf4a39219c81ef121e078d0013bcR226 which has numerous structural similarities to this work:
* Producing all four histograms in one pass
* Skipping passes based on detecting the total in the histogram
* Bailing out of the skip detection if a nonzero value not equal to the total is encountered
* Manually unrolling the LSD radix sort loop in order to avoid array copies
My implementation from 10th April is below for reference:
public static void unrollOnePassHistogramsSkipLevels(int[] data) {
int[] histogram1 = new int[257];
int[] histogram2 = new int[257];
int[] histogram3 = new int[257];
int[] histogram4 = new int[257];
for (int value : data) {
++histogram1[(value & 0xFF) + 1];
++histogram2[((value >>> 8) & 0xFF) + 1];
++histogram3[((value >>> 16) & 0xFF) + 1];
++histogram4[(value >>> 24) + 1];
}
boolean skipLevel1 = canSkipLevel(histogram1, data.length);
boolean skipLevel2 = canSkipLevel(histogram2, data.length);
boolean skipLevel3 = canSkipLevel(histogram3, data.length);
boolean skipLevel4 = canSkipLevel(histogram4, data.length);
if (skipLevel1 && skipLevel2 && skipLevel3 && skipLevel4) {
return;
}
int[] copy = new int[data.length];
int[] source = data;
int[] dest = copy;
if (!skipLevel1) {
for (int i = 1; i < histogram1.length; ++i) {
histogram1[i] += histogram1[i - 1];
}
for (int value : source) {
dest[histogram1[value & 0xFF]++] = value;
}
if (!skipLevel2 || !skipLevel3 || !skipLevel4) {
int[] tmp = dest;
dest = source;
source = tmp;
}
}
if (!skipLevel2) {
for (int i = 1; i < histogram2.length; ++i) {
histogram2[i] += histogram2[i - 1];
}
for (int value : source) {
dest[histogram2[(value >>> 8) & 0xFF]++] = value;
}
if (!skipLevel3 || !skipLevel4) {
int[] tmp = dest;
dest = source;
source = tmp;
}
}
if (!skipLevel3) {
for (int i = 1; i < histogram3.length; ++i) {
histogram3[i] += histogram3[i - 1];
}
for (int value : data) {
dest[histogram3[(value >>> 16) & 0xFF]++] = value;
}
if (!skipLevel4) {
int[] tmp = dest;
dest = source;
source = tmp;
}
}
if (!skipLevel4) {
for (int i = 1; i < histogram4.length; ++i) {
histogram4[i] += histogram4[i - 1];
}
for (int value : source) {
dest[histogram4[value >>> 24]++] = value;
}
}
if (dest != data) {
System.arraycopy(dest, 0, data, 0, data.length);
}
}
private static boolean canSkipLevel(int[] histogram, int dataSize) {
for (int count : histogram) {
if (count == dataSize) {
return true;
} else if (count > 0) {
return false;
}
}
return true;
}
Moreover, @bourgesl forked my repository on 11/04/2021 and communicated with me about doing so. On 25/04/2021 there was a new implementation of `DualPivotQuicksort` with a signed radix sort but the same structural similarities, and with the same method and variable names in places https://github.com/bourgesl/radix-sort-benchmark/commit/90ff7e427da0fa49f374bff0241fb2487bd87bde#diff-397ce8fd791e2ce508cf9127201bc9ab46264cd2a79fd0487a63569f2e4b59b2R607-R609
// TODO add javadoc
private static void radixSort(Sorter sorter, int[] a, int low, int high) {
int[] b;
// LBO: prealloc (high - low) +1 element:
if (sorter == null || (b = sorter.b) == null || b.length < (high - low)) {
// System.out.println("alloc b: " + (high - low));
b = new int[high - low];
}
int[] count1, count2, count3, count4;
if (sorter != null) {
sorter.resetRadixBuffers();
count1 = sorter.count1;
count2 = sorter.count2;
count3 = sorter.count3;
count4 = sorter.count4;
} else {
// System.out.println("alloc radix buffers(4x256)");
count1 = new int[256];
count2 = new int[256];
count3 = new int[256];
count4 = new int[256];
}
for (int i = low; i < high; ++i) {
--count1[ a[i] & 0xFF ];
--count2[(a[i] >>> 8) & 0xFF ];
--count3[(a[i] >>> 16) & 0xFF ];
--count4[(a[i] >>> 24) ^ 0x80 ];
}
boolean skipLevel4 = canSkipLevel(count4, low - high);
boolean skipLevel3 = skipLevel4 && canSkipLevel(count3, low - high);
boolean skipLevel2 = skipLevel3 && canSkipLevel(count2, low - high);
count1[255] += high;
count2[255] += high;
count3[255] += high;
count4[255] += high;
// 1 todo process LSD
for (int i = 255; i > 0; --i) {
count1[i - 1] += count1[i];
}
for (int i = low; i < high; ++i) {
b[count1[a[i] & 0xFF]++ - low] = a[i];
}
if (skipLevel2) {
System.arraycopy(b, 0, a, low, high - low);
return;
}
// 2
for (int i = 255; i > 0; --i) {
count2[i - 1] += count2[i];
}
//for (int value : b) {
// a[count2[(value >> 8) & 0xFF]++] = value;
for (int i = low; i < high; ++i) {
a[count2[(b[i] >> 8) & 0xFF]++] = b[i];
}
if (skipLevel3) {
return;
}
// 3
for (int i = 255; i > 0; --i) {
count3[i - 1] += count3[i];
}
for (int i = low; i < high; ++i) {
b[count3[(a[i] >> 16) & 0xFF]++ - low] = a[i];
}
if (skipLevel4) {
System.arraycopy(b, 0, a, low, high - low);
return;
}
// 4
for (int i = 255; i > 0; --i) {
count4[i - 1] += count4[i];
}
// for (int value : b) {
// a[count4[ (value >>> 24) ^ 0x80]++] = value;
for (int i = low; i < high; ++i) {
a[count4[ (b[i] >>> 24) ^ 0x80]++] = b[i];
}
}
// TODO: add javadoc
private static boolean canSkipLevel(int[] count, int total) {
for (int c : count) {
if (c == 0) {
continue;
}
return c == total;
}
return true;
}
Later, the name of the method `canSkipLevel` changed to `skipByte`: https://github.com/bourgesl/radix-sort-benchmark/commit/a693b26b2e2c14cfeedf9c753c9d643096b0e38d#diff-397ce8fd791e2ce508cf9127201bc9ab46264cd2a79fd0487a63569f2e4b59b2R719 - this is the name of the method in the version of this sort you committed on 05/01/2021 https://github.com/richardstartin/sorting/commit/f076073b8b819a9687613903a164e3ed71821769#diff-4b4d68fc834c2ad12a9fb9d316a812221af7c398338ed2ee907d0a795e7aadafR601-R604
// TODO add javadoc
// private
static void radixSort(Sorter sorter, int[] a, int low, int high) {
//System.out.println(" Radix !!!");
int[] count1 = new int[256];
int[] count2 = new int[256];
int[] count3 = new int[256];
int[] count4 = new int[256];
for (int i = low; i < high; ++i) {
count1[ a[i] & 0xFF ]--;
count2[(a[i] >>> 8) & 0xFF ]--;
count3[(a[i] >>> 16) & 0xFF ]--;
count4[(a[i] >>> 24) ^ 0x80 ]--;
}
boolean skipByte4 = skipByte(count4, low - high, high, true);
boolean skipByte3 = skipByte(count3, low - high, high, skipByte4);
boolean skipByte2 = skipByte(count2, low - high, high, skipByte3);
boolean skipByte1 = skipByte(count1, low - high, high, skipByte2);
if (skipByte1) {
//Main.check(a, low, high - 1); // todo
return;
}
// int xorA = Main.getXor(a, low, high);
int[] b; int offset = low;
if (sorter == null || (b = (int[]) sorter.b) == null) {
b = new int[high - low];
} else {
offset = sorter.offset;
//System.out.println(" !!!! offset: " + offset);
}
int start = low - offset;
int last = high - offset;
// 1 todo process LSD
for (int i = low; i < high; ++i) {
b[count1[a[i] & 0xFF]++ - offset] = a[i];
}
// if (xorA != Main.getXor(a, low, high)) System.out.println("6K_4 1 xor changed");
if (skipByte2) {
System.arraycopy(b, start, a, low, high - low);
//Main.check(a, low, high - 1); // todo
return;
}
// 2
for (int i = start; i < last; ++i) {
a[count2[(b[i] >> 8) & 0xFF]++] = b[i];
}
// if (xorA != Main.getXor(a, low, high)) System.out.println("6K_4 2 xor changed");
if (skipByte3) {
//Main.check(a, low, high - 1); // todo
return;
}
// 3
for (int i = low; i < high; ++i) {
b[count3[(a[i] >> 16) & 0xFF]++ - offset] = a[i];
}
// if (xorA != Main.getXor(a, low, high)) System.out.println("6K_4 3 xor changed");
if (skipByte4) {
System.arraycopy(b, start, a, low, high - low);
//Main.check(a, low, high - 1); // todo
return;
}
// if (xorA != Main.getXor(a, low, high)) System.out.println("6K_4 4 xor changed");
// 4
for (int i = start; i < last; ++i) {
a[count4[(b[i] >>> 24) ^ 0x80]++] = b[i];
}
// if (xorA != Main.getXor(a, low, high)) System.out.println("6K_4 5 xor changed");
//Main.check(a, low, high - 1); // todo
}
// TODO: add javadoc
private static boolean skipByte(int[] count, int total, int high, boolean prevSkip) {
if (prevSkip) {
for (int c : count) {
if (c == 0) {
continue;
}
if (c == total) {
return true;
}
break;
}
}
// todo create historgam
count[255] += high;
for (int i = 255; i > 0; --i) {
count[i - 1] += count[i];
}
return false;
}
`skipByte` was later renamed to `passLevel` here, which is the name used in this PR: https://github.com/richardstartin/sorting/commit/22357e407d3ae7a1e159e06fe4afbb2c57f7d34c#diff-4b4d68fc834c2ad12a9fb9d316a812221af7c398338ed2ee907d0a795e7aadaf
Given the structural similarities mentioned, the chronological order of these commits, and the demonstrable provenance of the method name `passLevel` from `canSkipLevel` and since you have patented sort algorithms in the past, I want to make sure that it is recognised that this work is derived from my own.
-------------
PR: https://git.openjdk.java.net/jdk/pull/3938
More information about the core-libs-dev
mailing list