[vectorIntrinsics] C2 is fragile
Eugene Kluchnikov
eustas.ru at gmail.com
Tue Mar 16 20:36:23 UTC 2021
Hello, Vladimir.
Sorry for the long delay. I've reduced reproduction cases to small one
file programs.
So, just
- javac Demo2a.java && java -XX:+UnlockDiagnosticVMOptions
-XX:CompileCommand="print,*.innocent" Demo2a
- javac Demo2b.java && java -XX:+UnlockDiagnosticVMOptions
-XX:CompileCommand="print,*.innocent" Demo2b
Demo2a generates "bad" assembly for "innocent" method. Demo2b generates
fairly good assembly for "innocent" method.
In the meanwhile "innocent" method has the same source code. Demo2b runs
16.4s on my MacBook, 2.7s (turbo-boost disabled for CPU-time stability).
File Demo2a.java begin >>>
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.VectorSpecies;
public class Demo2a {
private static final VectorSpecies<Float> VFP = FloatVector.SPECIES_256;
private static final VectorSpecies<Integer> VIP = IntVector.SPECIES_256;
private static final VectorSpecies<Integer> VI4 = IntVector.SPECIES_128;
static final int STEP = VFP.length();
static void innocent(int[] sum, int count, int[] regionX, int[] dst) {
if (count > regionX.length)
return;
IntVector acc = IntVector.fromArray(VI4, sum, regionX[0] * 4);
for (int i = 1; i < count; i++) {
acc = acc.add(IntVector.fromArray(VI4, sum, regionX[i] * 4));
}
acc.intoArray(dst, 0);
}
private static final int MAX_INT = (1 << 23) - 1;
private static final IntVector INTEGER_MASK = IntVector.broadcast(VIP,
MAX_INT);
private static final FloatVector IMPLICIT_ONE = FloatVector.broadcast(VFP,
MAX_INT + 1);
static void bad(float[] regionY, float[] regionX0f, float[] regionX1f, int[]
rowOffset, int[] regionX, int count) {
FloatVector mNyNx = FloatVector.broadcast(VFP, 42);
FloatVector dNx = FloatVector.broadcast(VFP, 43);
for (int i = 0; i < count; i += STEP) {
FloatVector y = FloatVector.fromArray(VFP, regionY, i);
FloatVector x0 = FloatVector.fromArray(VFP, regionX0f, i);
FloatVector x1 = FloatVector.fromArray(VFP, regionX1f, i);
IntVector off = IntVector.fromArray(VIP, rowOffset, i);
FloatVector x = y.fma(mNyNx, dNx).max(x0).min(x1);
IntVector xi = x.add(IMPLICIT_ONE).viewAsIntegralLanes().and(INTEGER_MASK);
IntVector xOff = xi.add(off);
xOff.intoArray(regionX, i);
}
}
public static void main(String[] args) {
int w = 300;
int h = 200;
int stride = 4 * (w + 1);
int[] sum = new int[h * stride];
float[] rY = new float[h];
float[] rX0f = new float[h];
float[] rX1f = new float[h];
int[] rowOffset = new int[h];
int[] rX = new int[h];
int[] total = new int[4];
for (int i = 0; i < 16 * 1024 * 1024; ++i) {
bad(rY, rX0f, rX1f, rowOffset, rX, h);
innocent(sum, h, rX, total);
}
}
}
<<< File Demo2a.java end
File Demo2b.java begin >>>
import jdk.incubator.vector.FloatVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.VectorSpecies;
public class Demo2b {
private static final VectorSpecies<Float> VFP = FloatVector.SPECIES_256;
private static final VectorSpecies<Integer> VIP = IntVector.SPECIES_256;
private static final VectorSpecies<Integer> VI4 = IntVector.SPECIES_128;
static final int STEP = VFP.length();
static void innocent(int[] sum, int count, int[] regionX, int[] dst) {
if (count > regionX.length)
return;
IntVector acc = IntVector.fromArray(VI4, sum, regionX[0] * 4);
for (int i = 1; i < count; i++) {
acc = acc.add(IntVector.fromArray(VI4, sum, regionX[i] * 4));
}
acc.intoArray(dst, 0);
}
private static final int MAX_INT = (1 << 23) - 1;
private static final IntVector INTEGER_MASK = IntVector.broadcast(VIP,
MAX_INT);
private static final FloatVector IMPLICIT_ONE = FloatVector.broadcast(VFP,
MAX_INT + 1);
static void good(float[] regionY, float[] regionX0f, float[] regionX1f, int
[] regionX, int count, int kappa) {
FloatVector mNyNx = FloatVector.broadcast(VFP, 42);
FloatVector dNx = FloatVector.broadcast(VFP, 43);
FloatVector k = FloatVector.broadcast(VFP, kappa);
for (int i = 0; i < count; i += STEP) {
FloatVector y = FloatVector.fromArray(VFP, regionY, i);
FloatVector x0 = FloatVector.fromArray(VFP, regionX0f, i);
FloatVector x1 = FloatVector.fromArray(VFP, regionX1f, i);
FloatVector x = y.fma(mNyNx, dNx).max(x0).min(x1);
FloatVector xOff = y.fma(k, x);
xOff.add(IMPLICIT_ONE).viewAsIntegralLanes().and(INTEGER_MASK).intoArray(
regionX, i);
}
}
public static void main(String[] args) {
int w = 300;
int h = 200;
int stride = 4 * (w + 1);
int[] sum = new int[h * stride];
float[] rY = new float[h];
float[] rX0f = new float[h];
float[] rX1f = new float[h];
int[] rX = new int[h];
int[] total = new int[4];
for (int i = 0; i < 16 * 1024 * 1024; ++i) {
good(rY, rX0f, rX1f, rX, h, stride / 4);
innocent(sum, h, rX, total);
}
}
}
<<< File Demo2b.java end
On Mon, 15 Mar 2021 at 10:33, Vladimir Ivanov <vladimir.x.ivanov at oracle.com>
wrote:
> Hi Eugene,
>
> Do you have a test case available so I can try to reproduce the problem
> myself?
>
> The only idea I have right now is that profile pollution is in play here:
>
> private static final VectorSpecies<Integer> VIP =
> IntVector.SPECIES_256;
> private static final VectorSpecies<Integer> VI4 =
> IntVector.SPECIES_128;
>
> static void sumAbs(int[] sum, int count, int[] regionX, int[] dst) {
> ...
> IntVector acc = IntVector.fromArray(VI4, sum, regionX[0] * 4);
>
> vs
>
> // BAD
> IntVector off = IntVector.fromArray(VIP, rowOffset, i);
>
>
> But considering VIP and VI4 are constants (static final), it's hard to
> see how it can be the case.
>
> Best regards,
> Vladimir Ivanov
>
> On 15.03.2021 01:58, Eugene Kluchnikov wrote:
> > private static final VectorSpecies<Float> VFP = FloatVector.SPECIES_256;
> > private static final VectorSpecies<Integer> VIP = IntVector.SPECIES_256;
> > private static final VectorSpecies<Integer> VI4 = IntVector.SPECIES_128;
> >
> > static final int STEP = VFP.length();
> >
> > static void sumAbs(int[] sum, int count, int[] regionX, int[] dst) {
> > if (count > regionX.length) return;
> > IntVector acc = IntVector.fromArray(VI4, sum, regionX[0] * 4);
> > for (int i = 1; i < count; i++) {
> > acc = acc.add(IntVector.fromArray(VI4, sum, regionX[i] * 4));
> > }
> > acc.intoArray(dst, 0);
> > }
> >
> > private static int MAX_INT = (1 << 23) - 1;
> > private static IntVector INTEGER_MASK = IntVector.broadcast(VIP,
> MAX_INT);
> > private static FloatVector IMPLICIT_ONE = FloatVector.broadcast(VFP,
> > MAX_INT + 1);
> >
> > // x >= (d - y * ny) / nx
> > static void updateGeGeneric(int angle, int d, float[] regionY, float[]
> > regionX0f,
> > float[] regionX1f, int[] rowOffset, int[] regionX, int count, int kappa)
> {
> > FloatVector mNyNx = FloatVector.broadcast(VFP, SinCos.MINUS_COT[angle]);
> > FloatVector dNx = FloatVector.broadcast(VFP, (float)(d * SinCos.INV_SIN[
> > angle]));
> > FloatVector k = FloatVector.broadcast(VFP, kappa);
> > for (int i = 0; i < count; i += STEP) {
> > FloatVector y = FloatVector.fromArray(VFP, regionY, i);
> > FloatVector x0 = FloatVector.fromArray(VFP, regionX0f, i);
> > FloatVector x1 = FloatVector.fromArray(VFP, regionX1f, i);
> >
> > // BAD
> > IntVector off = IntVector.fromArray(VIP, rowOffset, i);
> > FloatVector x = y.fma(mNyNx, dNx).max(x0).min(x1);
> > IntVector xi =
> x.add(IMPLICIT_ONE).viewAsIntegralLanes().and(INTEGER_MASK);
> > IntVector xOff = xi.add(off);
> > xOff.intoArray(regionX, i);
> >
> > // GOOD
> > //FloatVector x = y.fma(mNyNx, dNx).max(x0).min(x1);
> > //FloatVector xOff = y.fma(k, x);
> >
> //xOff.add(IMPLICIT_ONE).viewAsIntegralLanes().and(INTEGER_MASK).intoArray(regionX,
> > i);
> > }
> > }
>
--
С наилучшими пожеланиями, Евгений Ключников
WBR, Eugene Kluchnikov
More information about the panama-dev
mailing list