Re: [VectorAPI]anyTrue/allTrue got wrong results when using UseAVX=3
Wang Zhuo(Zhuoren)
zhuoren.wz at alibaba-inc.com
Fri Jan 4 09:24:00 UTC 2019
Thank you Ivanov,
I used legVec* and it fixed this issue.
Below is the new patch. Please review.
diff -r f4f4ef0120ba src/hotspot/cpu/x86/x86.ad
--- a/src/hotspot/cpu/x86/x86.ad Wed Dec 26 22:09:29 2018 +0100
+++ b/src/hotspot/cpu/x86/x86.ad Fri Jan 04 17:11:08 2019 +0800
@@ -21919,7 +21919,7 @@
ins_pipe( pipe_slow );
%}
-instruct vptest4inae(rRegI dst, vecX src1, vecX src2) %{
+instruct vptest4inae(rRegI dst, legVecX src1, legVecX src2) %{
predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
match(Set dst (VectorTest src1 src2 ));
format %{ "vptest $src1,$src2\n\t"
@@ -21933,7 +21933,7 @@
ins_pipe( pipe_slow );
%}
-instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{
+instruct vptest4ieq(rRegI dst, legVecX src1, legVecX src2) %{
predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
match(Set dst (VectorTest src1 src2 ));
format %{ "vptest $src1,$src2\n\t"
@@ -21947,7 +21947,7 @@
ins_pipe( pipe_slow );
%}
-instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{
+instruct vptest8inae(rRegI dst, legVecY src1, legVecY src2) %{
predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
match(Set dst (VectorTest src1 src2 ));
format %{ "vptest $src1,$src2\n\t"
@@ -21961,7 +21961,7 @@
ins_pipe( pipe_slow );
%}
-instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{
+instruct vptest8ieq(rRegI dst, legVecY src1, legVecY src2) %{
predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
match(Set dst (VectorTest src1 src2 ));
format %{ "vptest $src1,$src2\n\t"
Regards,
Zhuoren
------------------------------------------------------------------
Sender:Vladimir Ivanov <vladimir.x.ivanov at oracle.com>
Sent At:2019 Jan. 4 (Fri.) 05:24
Recipient:Sandler <zhuoren.wz at alibaba-inc.com>; panama-dev <panama-dev at openjdk.java.net>
Cc:li sanhong <sanhong.lsh at alibaba-inc.com>; Kingsum Chow <kingsum.kc at alibaba-inc.com>
Subject:Re: [VectorAPI]anyTrue/allTrue got wrong results when using UseAVX=3
Good catch, Zhuoren!
The problem looks very similar to the one fixed by JDK-8210764 [1].
Switching to restricted register classes (legVec*) introduced as part of
JDK-8210764 looks like the optimal fix in this case.
Best regards,
Vladimir Ivanov
[1] https://bugs.openjdk.java.net/browse/JDK-8210764
On 02/01/2019 01:47, 王卓(卓仁) wrote:
>
> Hello, I found anyTrue/allTrue got wrong results when using UseAVX=3.
>
> Let me describe this bug. Under UseAVX=3, following assembly code may be generated for anyTrue/allTrue.
>
> 0x00002ad5dca584f6: vpxord %xmm22,%xmm22,%xmm22
> 0x00002ad5dca584fc: vpsubb %xmm0,%xmm22,%xmm22
> 0x00002ad5dca58502: vpmovsxbd %xmm22,%ymm22
> 0x00002ad5dca58508: vptest %ymm1,%ymm6
> 0x00002ad5dca5850d: setb %r10b
>
> The second operand of vptest should be ymm22. But since there is no EVEX version for vptest, it is encoded as ymm6 and gives us wrong results.
>
> Currently I am using the following fix for this bug. Move source operands into ymm14/ymm15/xmm14/xmm15, and then they are used as operands of vptest. Please give advise on this fix.
> diff -r 636478e1ee75 src/hotspot/cpu/x86/x86.ad
> --- a/src/hotspot/cpu/x86/x86.ad Thu Dec 13 16:40:28 2018 -0800
> +++ b/src/hotspot/cpu/x86/x86.ad Sat Dec 29 11:16:55 2018 +0800
> @@ -21930,7 +21930,7 @@
> %}
>
> instruct vptest4ieq(rRegI dst, vecX src1, vecX src2) %{
> - predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> + predicate(UseAVX < 3 && UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
> "setb $dst\t!" %}
> @@ -21943,8 +21943,54 @@
> ins_pipe( pipe_slow );
> %}
>
> +instruct vptest4inaeavx3(rRegI dst, vecX src1, vecX src2, rxmm14 tmp14, rxmm15 tmp15) %{
> + predicate(UseAVX > 2 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> + match(Set dst (VectorTest src1 src2 ));
> + effect(TEMP tmp14, TEMP tmp15);
> + format %{ "movdqu $tmp14,$src1\n\t"
> + "movdqu $tmp15,$src2\n\t"
> + "vptest $tmp14,$tmp15\n\t"
> + "setb $dst\t!" %}
> + ins_encode %{
> + int vector_len = 0;
> + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> + __ movdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> + }
> + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> + __ movdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> + }
> + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> + __ setb(Assembler::carrySet, $dst$$Register);
> + __ movzbl($dst$$Register, $dst$$Register);
> + %}
> + ins_pipe( pipe_slow );
> +%}
> +
> +instruct vptest4ieqavx3(rRegI dst, vecX src1, vecX src2, rxmm14 tmp14, rxmm15 tmp15) %{
> + predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> + match(Set dst (VectorTest src1 src2 ));
> + effect(TEMP tmp14, TEMP tmp15);
> + format %{ "movdqu $tmp14,$src1\n\t"
> + "movdqu $tmp15,$src2\n\t"
> + "vptest $tmp14,$tmp15\n\t"
> + "setb $dst\t!" %}
> + ins_encode %{
> + int vector_len = 0;
> + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> + __ movdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> + }
> + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> + __ movdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> + }
> + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> + __ setb(Assembler::notZero, $dst$$Register);
> + __ movzbl($dst$$Register, $dst$$Register);
> + %}
> + ins_pipe( pipe_slow );
> +%}
> +
> instruct vptest8inae(rRegI dst, vecY src1, vecY src2) %{
> - predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> + predicate(UseAVX < 3 && UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
> "setb $dst\t!" %}
> @@ -21958,7 +22004,7 @@
> %}
>
> instruct vptest8ieq(rRegI dst, vecY src1, vecY src2) %{
> - predicate(UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> + predicate(UseAVX < 3 && UseAVX > 0 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> match(Set dst (VectorTest src1 src2 ));
> format %{ "vptest $src1,$src2\n\t"
> "setb $dst\t!" %}
> @@ -21971,6 +22017,52 @@
> ins_pipe( pipe_slow );
> %}
> +instruct vptest8inaeavx3(rRegI dst, vecY src1, vecY src2, rymm14 tmp14, rymm15 tmp15) %{
> + predicate(UseAVX > 2 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::carrySet);
> + match(Set dst (VectorTest src1 src2 ));
> + effect(TEMP tmp14, TEMP tmp15);
> + format %{ "movdqu $tmp14,$src1\n\t"
> + "movdqu $tmp15,$src2\n\t"
> + "vptest $tmp14,$tmp15\n\t"
> + "setb $dst\t!" %}
> + ins_encode %{
> + int vector_len = 1;
> + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> + __ vmovdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> + }
> + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> + __ vmovdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> + }
> + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> + __ setb(Assembler::carrySet, $dst$$Register);
> + __ movzbl($dst$$Register, $dst$$Register);
> + %}
> + ins_pipe( pipe_slow );
> +%}
> +
> +instruct vptest8ieqavx3(rRegI dst, vecY src1, vecY src2, rymm14 tmp14, rymm15 tmp15) %{
> + predicate(UseAVX > 2 && static_cast<const VectorTestNode*>(n)->get_predicate() == Assembler::notZero);
> + match(Set dst (VectorTest src1 src2 ));
> + effect(TEMP tmp14, TEMP tmp15);
> + format %{ "movdqu $tmp14,$src1\n\t"
> + "movdqu $tmp15,$src2\n\t"
> + "vptest $tmp14,$tmp15\n\t"
> + "setb $dst\t!" %}
> + ins_encode %{
> + int vector_len = 1;
> + if ($tmp14$$XMMRegister != $src1$$XMMRegister) {
> + __ vmovdqu($tmp14$$XMMRegister, $src1$$XMMRegister);
> + }
> + if ($tmp15$$XMMRegister != $src2$$XMMRegister) {
> + __ vmovdqu($tmp15$$XMMRegister, $src2$$XMMRegister);
> + }
> + __ vptest($tmp14$$XMMRegister, $tmp15$$XMMRegister, vector_len);
> + __ setb(Assembler::notZero, $dst$$Register);
> + __ movzbl($dst$$Register, $dst$$Register);
> + %}
> + ins_pipe( pipe_slow );
> +%}
> +
> instruct loadmask8b(vecD dst, vecD src) %{
> predicate(UseSSE >= 2 && n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
> match(Set dst (VectorLoadMask src));
> diff -r 636478e1ee75 src/hotspot/cpu/x86/x86_64.ad
> --- a/src/hotspot/cpu/x86/x86_64.ad Thu Dec 13 16:40:28 2018 -0800
> +++ b/src/hotspot/cpu/x86/x86_64.ad Sat Dec 29 11:16:55 2018 +0800
> @@ -4426,6 +4426,17 @@
> predicate(UseAVX == 3); format%{%} interface(REG_INTER);
> %}
>
> +
> +operand rymm14() %{
> + constraint(ALLOC_IN_RC(ymm14_reg)); match(VecY);
> + predicate((UseSSE > 0) && (UseAVX <= 3)); format%{%} interface(REG_INTER);
> +%}
> +operand rymm15() %{
> + constraint(ALLOC_IN_RC(ymm15_reg)); match(VecY);
> + predicate((UseSSE > 0) && (UseAVX <= 3)); format%{%} interface(REG_INTER);
> +%}
> +
> +
> //----------OPERAND CLASSES----------------------------------------------------
> // Operand Classes are groups of operands that are used as to simplify
> // instruction definitions by not requiring the AD writer to specify separate
>
>
> The test to reproduce this bug. Please DO set -XX:UseAVX=3
>
> import jdk.incubator.vector.*;
> import java.util.Arrays;
> import java.util.Random;
> import java.lang.reflect.Field;
> import java.io.IOException;
> import jdk.incubator.vector.Vector.Mask;
> import jdk.incubator.vector.Vector.Shape;
> public class VectorTrueTest
> {
>
> static Random random = new Random();
> static final IntVector.IntSpecies Species = IntVector.species(Vector.Shape.S_256_BIT);
> public static int size = 1024 * 16;
> public static int length = Species.length();
> public static int resultSize = size / length;
> static boolean[] anyResultV = new boolean[resultSize];
> static boolean[] allResultV = new boolean[resultSize];
> static boolean[] anyInput = new boolean[size];
> static boolean[] allInput = new boolean[size];
> public static void main(String[] args) throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException, InstantiationException {
> long start0 = System.currentTimeMillis();
> long startv = System.currentTimeMillis();
> long normalTime = 0;
> long vecTime = 0;
> int i = 0;
> for (i = 0; i < size; i++) {
> anyInput[i] = false;
> allInput[i] = false;
> }
> for (i = 0; i < 20000; i++) {
> vecTest(Species);
> }
> for (i = 0; i < resultSize; i++) {
> anyResultV[i] = true;
> allResultV[i] = true;
> }
> vecTest(Species);
> for (i = 0; i < (resultSize - 1); i++) {
> if (anyResultV[i] != false) throw new RuntimeException("Wrong anyTrue result! Should be all false, index " + i);
> if (allResultV[i] != false) throw new RuntimeException("Wrong allTrue result! Should be all false, index " + i);
> }
> }
> static void vecTest(IntVector.IntSpecies Speciesint ) {
> IntVector v0;
> int i = 0;
> int j = 0;
> Mask maskAny = Speciesint.maskFromArray(anyInput, i);
> Mask maskAll = Speciesint.maskFromArray(allInput, i);
> for (i = 0; i + (Speciesint.length()) <= size; i += Speciesint.length()) {
> allResultV[j] = maskAll.allTrue();
> anyResultV[j] = maskAny.anyTrue();
> j++;
> }
> return;
> }
> }
>
> Regards,
> Zhuoren
>
>
>
>
More information about the panama-dev
mailing list