RFR: 8267370: [Vector API] Fix several crashes after JDK-8256973

Jatin Bhateja jbhateja at openjdk.java.net
Wed May 19 09:48:46 UTC 2021


On Wed, 19 May 2021 08:26:24 GMT, Jie Fu <jiefu at openjdk.org> wrote:

> Hi all,
> 
> Several vector tests fail with UseAVX=1 after JDK-8256973.
> The reason is that `vpmovmskb` [1] can be only used with UseAVX > 1 [2].
> The fix just disables the intrinsics when UseAVX < 2.
> 
> Testing:
>  - jdk/incubator/vector with UseAVX={0/1/2/3} on Linux/x64
> 
> Thanks.
> Best regards,
> Jie
> 
> [1] https://github.com/openjdk/jdk/blob/master/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp#L3785
> [2] https://github.com/openjdk/jdk/blob/master/src/hotspot/cpu/x86/assembler_x86.cpp#L4127

Hi @DamonFool , 

Thanks much for providing a quick fix. Problem here seems to be related to assert in following assembler routines which made an assumption that the instruction is only supported over AVX2 platforms.

>void Assembler::vpmovmskb(Register dst, XMMRegister src) {
> assert(VM_Version::supports_avx2(), "");


VEX.128.66.0F.WIG D7 /r VPMOVMSKB reg, xmm1 | RM | V/V | AVX | Move a byte mask of xmm1 to reg. The upper bits of r32 or r64 are filled with zeros.
-- | -- | -- | -- | --




But, its ok to limit the patch to AVX2 considering optimization is majorly aimed at modern server targets (which should supported AVX2). 

An alternate fix is proposed below which does not restrict the optimization for AVX2 and still keep the changes minimal, since movmaskb is already used at several places in context of 256 bit vector argument.


diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index d915c846d09..c695da12d42 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -4123,9 +4123,10 @@ void Assembler::pmovmskb(Register dst, XMMRegister src) {
   emit_int16((unsigned char)0xD7, (0xC0 | encode));
 }
 
-void Assembler::vpmovmskb(Register dst, XMMRegister src) {
-  assert(VM_Version::supports_avx2(), "");
-  InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+void Assembler::vpmovmskb(Register dst, XMMRegister src, int vec_enc) {
+  assert((VM_Version::supports_avx() && vec_enc == AVX_128bit) ||
+         (VM_Version::supports_avx2() && vec_enc  == AVX_256bit), "");
+  InstructionAttr attributes(vec_enc, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int16((unsigned char)0xD7, (0xC0 | encode));
 }
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 8526785eea1..a02fdf27582 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -1746,7 +1746,7 @@ private:
   void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
 
   void pmovmskb(Register dst, XMMRegister src);
-  void vpmovmskb(Register dst, XMMRegister src);
+  void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
 
   // SSE 4.1 extract
   void pextrd(Register dst, XMMRegister src, int imm8);
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 01212d790fb..310bbbfa150 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -3782,7 +3782,7 @@ void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister
   assert(VM_Version::supports_avx(), "");
   vpxor(xtmp, xtmp, xtmp, vec_enc);
   vpsubb(xtmp, xtmp, mask, vec_enc);
-  vpmovmskb(tmp, xtmp);
+  vpmovmskb(tmp, xtmp, vec_enc);
   switch(opc) {
     case Op_VectorMaskTrueCount:
       popcntq(dst, tmp);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 7c37899b456..3e95aa64a41 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -3216,9 +3216,9 @@ void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
   Assembler::vpmovzxbw(dst, src, vector_len);
 }
 
-void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
+void MacroAssembler::vpmovmskb(Register dst, XMMRegister src, int vector_len) {
   assert((src->encoding() < 16),"XMM register should be 0-15");
-  Assembler::vpmovmskb(dst, src);
+  Assembler::vpmovmskb(dst, src, vector_len);
 }
 
 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 074d4a61601..64f4d6e157b 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -1303,7 +1303,7 @@ public:
   void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
   void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
 
-  void vpmovmskb(Register dst, XMMRegister src);
+  void vpmovmskb(Register dst, XMMRegister src, int vector_len = Assembler::AVX_256bit);
 
   void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);

-------------

PR: https://git.openjdk.java.net/jdk/pull/4109


More information about the hotspot-compiler-dev mailing list