[VectorAPI]anyTrue/allTrue got wrong results when using UseAVX=3
Vladimir Ivanov
vladimir.x.ivanov at oracle.com
Fri Jan 4 18:10:56 UTC 2019
Thanks, pushed [1].
Best regards,
Vladimir Ivanov
[1] http://hg.openjdk.java.net/panama/dev/rev/dfe1c1d8a316
On 04/01/2019 01:24, Wang Zhuo(Zhuoren) wrote:
> Thank you Ivanov,
>
> I used legVec* and it fixed this issue.
> Below is the new patch. Please review.
>
> diff -r f4f4ef0120ba src/hotspot/cpu/x86/x86.ad
> --- a/src/hotspot/cpu/x86/x86.ad Wed Dec 26 22:09:29 2018 +0100
> +++ b/src/hotspot/cpu/x86/x86.ad Fri Jan 04 17:11:08 2019 +0800
> @@ -21919,7 +21919,7 @@
> ins_pipe( pipe_slow );
> %}
>
> -instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{
> +instruct vptest4inae(rRegI dst, legVecX src1, legVecX src2) %{
> predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
> @@ -21933,7 +21933,7 @@
> ins_pipe( pipe_slow );
> %}
>
> -instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{
> +instruct vptest4ieq(rRegI dst, legVecX src1, legVecX src2) %{
> predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
> @@ -21947,7 +21947,7 @@
> ins_pipe( pipe_slow );
> %}
>
> -instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{
> +instruct vptest8inae(rRegI dst, legVecY src1, legVecY src2) %{
> predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
> @@ -21961,7 +21961,7 @@
> ins_pipe( pipe_slow );
> %}
>
> -instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{
> +instruct vptest8ieq(rRegI dst, legVecY src1, legVecY src2) %{
> predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
>
> Regards,
> Zhuoren
>
> ------------------------------------------------------------------
> Sender:Vladimir Ivanov <vladimir.x.ivanov at oracle.com>
> Sent At:2019 Jan. 4 (Fri.) 05:24
> Recipient:Sandler <zhuoren.wz at alibaba-inc.com>; panama-dev
> <panama-dev at openjdk.java.net>
> Cc:li sanhong <sanhong.lsh at alibaba-inc.com>; Kingsum Chow
> <kingsum.kc at alibaba-inc.com>
> Subject:Re: [VectorAPI]anyTrue/allTrue got wrong results when using
> UseAVX=3
>
> Good catch, Zhuoren!
>
> The problem looks very similar to the one fixed by JDK-8210764 [1].
>
> Switching to restricted register classes (legVec*) introduced as part of
>
> JDK-8210764 looks like the optimal fix in this case.
>
> Best regards,
> Vladimir Ivanov
>
> [1] https://bugs.openjdk.java.net/browse/JDK-8210764
>
> On 02/01/2019 01:47, 王卓(卓仁) wrote:
> >
> > Hello, I found anyTrue/allTrue got wrong results when using UseAVX=3.
> >
> > Let me describe this bug. Under UseAVX=3, following assembly code may be generated for anyTrue/allTrue.
> >
> > 0x00002ad5dca584f6: vpxord %xmm22,%xmm22,%xmm22
> > 0x00002ad5dca584fc: vpsubb %xmm0,%xmm22,%xmm22
> > 0x00002ad5dca58502: vpmovsxbd %xmm22,%ymm22
> > 0x00002ad5dca58508: vptest %ymm1,%ymm6
> > 0x00002ad5dca5850d: setb %r10b
> >
> > The second operand of vptest should be ymm22. But since there is no EVEX version for vptest, it is encoded as ymm6 and gives us wrong results.
> >
> > Currently I am using the following fix for this bug. Move source operands into ymm14/ymm15/xmm14/xmm15, and then they are used as operands of vptest. Please give advise on this fix.
> > diff -r 636478e1ee75 src/hotspot/cpu/x86/x86.ad
> > --- a/src/hotspot/cpu/x86/x86.ad Thu Dec 13 16:40:28 2018 -0800
> > +++ b/src/hotspot/cpu/x86/x86.ad Sat Dec 29 11:16:55 2018 +0800
> > @@ -21930,7 +21930,7 @@
> > %}
> >
> > instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{
> > - predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> > + predicate(UseAVX < 3 && UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> > match(Set dst (VectorTest src1 src2 ));
> > format %{ "vptest $src1,$src2\n\t"
> > "setb $dst\t!" %}
> > @@ -21943,8 +21943,54 @@
> > ins_pipe( pipe_slow );
> > %}
> >
> > +instruct vptest4inaeavx3(rRegI dst, vecX src1, vecX src2, rxmm14 tmp14, rxmm15 tmp15) %{
> > + predicate(UseAVX > 2 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> > + match(Set dst (VectorTest src1 src2 ));
> > + effect(TEMP tmp14, TEMP tmp15);
> > + format %{ "movdqu $tmp14,$src1\n\t"
> > + "movdqu $tmp15,$src2\n\t"
> > + "vptest $tmp14,$tmp15\n\t"
> > + "setb $dst\t!" %}
> > + ins_encode %{
> > + int vector_len = 0;
> > + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> > + __ movdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> > + }
> > + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> > + __ movdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> > + }
> > + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> > + __ setb(Assembler::carrySet, $dst$$Register);
> > + __ movzbl($dst$$Register, $dst$$Register);
> > + %}
> > + ins_pipe( pipe_slow );
> > +%}
> > +
> > +instruct vptest4ieqavx3(rRegI dst, vecX src1, vecX src2, rxmm14 tmp14, rxmm15 tmp15) %{
> > + predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> > + match(Set dst (VectorTest src1 src2 ));
> > + effect(TEMP tmp14, TEMP tmp15);
> > + format %{ "movdqu $tmp14,$src1\n\t"
> > + "movdqu $tmp15,$src2\n\t"
> > + "vptest $tmp14,$tmp15\n\t"
> > + "setb $dst\t!" %}
> > + ins_encode %{
> > + int vector_len = 0;
> > + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> > + __ movdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> > + }
> > + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> > + __ movdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> > + }
> > + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> > + __ setb(Assembler::notZero, $dst$$Register);
> > + __ movzbl($dst$$Register, $dst$$Register);
> > + %}
> > + ins_pipe( pipe_slow );
> > +%}
> > +
> > instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{
> > - predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> > + predicate(UseAVX < 3 && UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> > match(Set dst (VectorTest src1 src2 ));
> > format %{ "vptest $src1,$src2\n\t"
> > "setb $dst\t!" %}
> > @@ -21958,7 +22004,7 @@
> > %}
> >
> > instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{
> > - predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> > + predicate(UseAVX < 3 && UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> > match(Set dst (VectorTest src1 src2 ));
> > format %{ "vptest $src1,$src2\n\t"
> > "setb $dst\t!" %}
> > @@ -21971,6 +22017,52 @@
> > ins_pipe( pipe_slow );
> > %}
> > +instruct vptest8inaeavx3(rRegI dst, vecY src1, vecY src2, rymm14 tmp14, rymm15 tmp15) %{
> > + predicate(UseAVX > 2 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> > + match(Set dst (VectorTest src1 src2 ));
> > + effect(TEMP tmp14, TEMP tmp15);
> > + format %{ "movdqu $tmp14,$src1\n\t"
> > + "movdqu $tmp15,$src2\n\t"
> > + "vptest $tmp14,$tmp15\n\t"
> > + "setb $dst\t!" %}
> > + ins_encode %{
> > + int vector_len = 1;
> > + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> > + __ vmovdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> > + }
> > + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> > + __ vmovdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> > + }
> > + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> > + __ setb(Assembler::carrySet, $dst$$Register);
> > + __ movzbl($dst$$Register, $dst$$Register);
> > + %}
> > + ins_pipe( pipe_slow );
> > +%}
> > +
> > +instruct vptest8ieqavx3(rRegI dst, vecY src1, vecY src2, rymm14 tmp14, rymm15 tmp15) %{
> > + predicate(UseAVX > 2 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> > + match(Set dst (VectorTest src1 src2 ));
> > + effect(TEMP tmp14, TEMP tmp15);
> > + format %{ "movdqu $tmp14,$src1\n\t"
> > + "movdqu $tmp15,$src2\n\t"
> > + "vptest $tmp14,$tmp15\n\t"
> > + "setb $dst\t!" %}
> > + ins_encode %{
> > + int vector_len = 1;
> > + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> > + __ vmovdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> > + }
> > + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> > + __ vmovdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> > + }
> > + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> > + __ setb(Assembler::notZero, $dst$$Register);
> > + __ movzbl($dst$$Register, $dst$$Register);
> > + %}
> > + ins_pipe( pipe_slow );
> > +%}
> > +
> > instruct loadmask8b(vecD dst, vecD src) %{
> > predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
> > match(Set dst (VectorLoadMask src));
> > diff -r 636478e1ee75 src/hotspot/cpu/x86/x86_64.ad
> > --- a/src/hotspot/cpu/x86/x86_64.ad Thu Dec 13 16:40:28 2018 -0800
> > +++ b/src/hotspot/cpu/x86/x86_64.ad Sat Dec 29 11:16:55 2018 +0800
> > @@ -4426,6 +4426,17 @@
> > predicate(UseAVX == 3); format%{%} interface(REG_INTER);
> > %}
> >
> > +
> > +operand rymm14() %{
> > + constraint(ALLOC_IN_RC(ymm14_reg)); match(VecY);
> > + predicate((UseSSE > 0) && (UseAVX <= 3)); format%{%} interface(REG_INTER);
> > +%}
> > +operand rymm15() %{
> > + constraint(ALLOC_IN_RC(ymm15_reg)); match(VecY);
> > + predicate((UseSSE > 0) && (UseAVX <= 3)); format%{%} interface(REG_INTER);
> > +%}
> > +
> > +
> > //----------OPERAND CLASSES----------------------------------------------------
> > // Operand Classes are groups of operands that are used as to simplify
> > // instruction definitions by not requiring the AD writer to specify separate
> >
> >
> > The test to reproduce this bug. Please DO set -XX:UseAVX=3
> >
> > import jdk.incubator.vector.*;
> > import java.util.Arrays;
> > import java.util.Random;
> > import java.lang.reflect.Field;
> > import java.io.IOException;
> > import jdk.incubator.vector.Vector.Mask;
> > import jdk.incubator.vector.Vector.Shape;
> > public class VectorTrueTest
> > {
> >
> > static Random random = new Random();
> > static final IntVector.IntSpecies Species = IntVector.species(Vector.Shape.S_256_BIT);
> > public static int size = 1024 * 16;
> > public static int length = Species.length();
> > public static int resultSize = size / length;
> > static boolean[] anyResultV = new boolean[resultSize];
> > static boolean[] allResultV = new boolean[resultSize];
> > static boolean[] anyInput = new boolean[size];
> > static boolean[] allInput = new boolean[size];
> > public static void main(String[] args) throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, InstantiationException {
> > long start0 = System.currentTimeMillis();
> > long startv = System.currentTimeMillis();
> > long normalTime = 0;
> > long vecTime = 0;
> > int i = 0;
> > for (i = 0; i < size; i++) {
> > anyInput[i] = false;
> > allInput[i] = false;
> > }
> > for (i = 0; i < 20000; i++) {
> > vecTest(Species);
> > }
> > for (i = 0; i < resultSize; i++) {
> > anyResultV[i] = true;
> > allResultV[i] = true;
> > }
> > vecTest(Species);
> > for (i = 0; i < (resultSize - 1); i++) {
> > if (anyResultV[i] != false) throw new RuntimeException("Wrong anyTrue result! Should be all false, index " + i);
> > if (allResultV[i] != false) throw new RuntimeException("Wrong allTrue result! Should be all false, index " + i);
> > }
> > }
> > static void vecTest(IntVector.IntSpecies Speciesint ) {
> > IntVector v0;
> > int i = 0;
> > int j = 0;
> > Mask maskAny = Speciesint.maskFromArray(anyInput, i);
> > Mask maskAll = Speciesint.maskFromArray(allInput, i);
> > for (i = 0; i + (Speciesint.length()) <= size; i += Speciesint.length()) {
> > allResultV[j] = maskAll.allTrue();
> > anyResultV[j] = maskAny.anyTrue();
> > j++;
> > }
> > return;
> > }
> > }
> >
> > Regards,
> > Zhuoren
> >
> >
> >
> >
>
More information about the panama-dev
mailing list