RFR: 8321001: RISC-V: C2 SignumVF [v3]

Wed Dec 6 11:37:01 UTC 2023

On Wed, 6 Dec 2023 02:32:57 GMT, Fei Yang <fyang at openjdk.org> wrote:

>> Hamlin Li has updated the pull request incrementally with one additional commit since the last revision:
>> 
>>   enable TestSignumVector.java on riscv
>
> src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp line 1687:
> 
>> 1685:   mv(t0, fclass_mask::zero | fclass_mask::nan);
>> 1686:   vand_vx(v0, v0, t0);
>> 1687:   vmseq_vv(v0, v0, zero);
> 
> I don't think that the input `zero` (a vector of floating-point 0.0) is appropriate here for `vmseq_vv` which does vector integer comparison. Why not do `vmseq_vi(v0, v0, 0)` instead? This will also help remove the `zero` parameter of this function.

Do you mean some patch like below?

diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
index ed421c9e287..aa82447b943 100644
--- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
+++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp
@@ -1676,15 +1676,14 @@ void C2_MacroAssembler::signum_fp(FloatRegister dst, FloatRegister one, bool is_
   bind(done);
 }
 
-void C2_MacroAssembler::signum_fp_v(VectorRegister dst, BasicType bt, int vlen,
-                                    VectorRegister zero, VectorRegister one) {
+void C2_MacroAssembler::signum_fp_v(VectorRegister dst, BasicType bt, int vlen, VectorRegister one) {
   vsetvli_helper(bt, vlen);
 
   // check if input is -0, +0, signaling NaN or quiet NaN
   vfclass_v(v0, dst);
   mv(t0, fclass_mask::zero | fclass_mask::nan);
   vand_vx(v0, v0, t0);
-  vmseq_vv(v0, v0, zero);
+  vmseq_vi(v0, v0, 0);
 
   // use floating-point 1.0 with a sign of input
   vfsgnj_vv(dst, one, dst, v0_t);
diff --git a/src/hotspot/cpu/riscv/riscv_v.ad b/src/hotspot/cpu/riscv/riscv_v.ad
index 9b3c9125b3b..2940655f44e 100644
--- a/src/hotspot/cpu/riscv/riscv_v.ad
+++ b/src/hotspot/cpu/riscv/riscv_v.ad
@@ -3664,15 +3664,15 @@ instruct vexpand(vReg dst, vReg src, vRegMask_V0 v0, vReg tmp) %{
 
 // Vector Math.signum
 
-instruct vsignum_reg(vReg dst, vReg zero, vReg one, vRegMask_V0 v0) %{
-  match(Set dst (SignumVF dst (Binary zero one)));
-  match(Set dst (SignumVD dst (Binary zero one)));
+instruct vsignum_reg(vReg dst, vReg one, vRegMask_V0 v0) %{
+  match(Set dst (SignumVF dst one));
+  match(Set dst (SignumVD dst one));
   effect(TEMP_DEF dst, TEMP v0);
   format %{ "vsignum $dst, $dst\t" %}
   ins_encode %{
     BasicType bt = Matcher::vector_element_basic_type(this);
     __ signum_fp_v(as_VectorRegister($dst$$reg), bt, Matcher::vector_length(this),
-                  as_VectorRegister($zero$$reg), as_VectorRegister($one$$reg));
+                   as_VectorRegister($one$$reg));
   %}
   ins_pipe(pipe_slow);
 %}


In fact this is also my initial patch, but at runtime it will report a `mismatch` issue.
And in x86 and aarch64, they all have `zero` too.


o749  SignumVD  === _ o746 o1276  [[ o750 ]]  #vectora[4]:{double}

--N: o749  SignumVD  === _ o746 o1276  [[ o750 ]]  #vectora[4]:{double}

   --N: o746  LoadVector  === o440 o865 o697  |o250  [[ o749 ]]  @double[int:>=0] (java/lang/Cloneable,java/io/Serializable):NotNull:exact+any *, idx=6; mismatched #vectora[4]:{double}
   VREG  300  loadV
   VREG_V1  300  loadV
   VREG_V2  300  loadV
   VREG_V3  300  loadV
   VREG_V4  300  loadV
   VREG_V5  300  loadV
   VREG_V6  300  loadV
   VREG_V7  300  loadV
   VREG_V8  300  loadV
   VREG_V9  300  loadV
   VREG_V10  300  loadV
   VREG_V11  300  loadV
   VREG_V12  300  loadV
   VREG_V13  300  loadV
   VREG_V14  300  loadV
   VREG_V15  300  loadV

      --N: o697  AddP  === _ o61 o1069 o1177  [[ o746 ]] 
      IREGP  100  addP_reg_imm
      IREGPNOSP  100  addP_reg_imm
      IREGP_R10  100  addP_reg_imm
      IREGP_R11  100  addP_reg_imm
      IREGP_R12  100  addP_reg_imm
      IREGP_R13  100  addP_reg_imm
      IREGP_R14  100  addP_reg_imm
      IREGP_R15  100  addP_reg_imm
      IREGP_R16  100  addP_reg_imm
      IREGP_R28  100  addP_reg_imm
      IREGP_R30  100  addP_reg_imm
      IREGP_R31  100  addP_reg_imm
      JAVATHREAD_REGP  100  addP_reg_imm
      INDIRECT  100  addP_reg_imm
      INDOFFL  0  INDOFFL
      INLINE_CACHE_REGP  100  addP_reg_imm
      MEMORY  0  INDOFFL
      IREGNORP  100  IREGP
      IREGILNP  100  IREGP
      IREGILNPNOSP  100  IREGPNOSP
      VMEMA  100  INDIRECT

         --N: o1069  AddP  === _ o61 o61 o1124  [[ o1056 o1046 o954 o949 o697 o869 o1051 o1041 ]] 
         IREGP  0  IREGP
         IREGPNOSP  0  IREGPNOSP
         IREGP_R10  0  IREGP_R10
         IREGP_R11  0  IREGP_R11
         IREGP_R12  0  IREGP_R12
         IREGP_R13  0  IREGP_R13
         IREGP_R14  0  IREGP_R14
         IREGP_R15  0  IREGP_R15
         IREGP_R16  0  IREGP_R16
         IREGP_R28  0  IREGP_R28
         IREGP_R30  0  IREGP_R30
         IREGP_R31  0  IREGP_R31
         JAVATHREAD_REGP  0  JAVATHREAD_REGP
         INDIRECT  0  INDIRECT
         INLINE_CACHE_REGP  0  INLINE_CACHE_REGP
         MEMORY  0  INDIRECT
         IREGNORP  0  IREGP
         IREGILNP  0  IREGP
         IREGILNPNOSP  0  IREGPNOSP
         VMEMA  0  INDIRECT

         --N: o1177  ConL  === o0  [[ o697 o698 ]]  #long:240
         IMML  0  IMML
         IMMLADD  0  IMMLADD
         IMMLSUB  0  IMMLSUB
         IMMLOFFSET  0  IMMLOFFSET
         IREGL  100  loadConL
         IREGLNOSP  100  loadConL
         IREGL_R28  100  loadConL
         IREGL_R29  100  loadConL
         IREGL_R30  100  loadConL
         IREGL_R10  100  loadConL
         IREGIORL  100  IREGL
         IREGILNP  100  IREGL
         IREGILNPNOSP  100  IREGLNOSP
         IMMIORL  0  IMML

   --N: o1276  Binary  === _ o747 o748  [[ o749 ]] 
   _Binary_vReg_vReg  0  _Binary_vReg_vReg

      --N: o747  Replicate  === _ o192  [[ o1276 o1277 o1275 o1274 o1273 o1272 o1271 o1270 o1269 ]]  #vectora[4]:{double}
      VREG  0  VREG
      VREG_V1  0  VREG_V1
      VREG_V2  0  VREG_V2
      VREG_V3  0  VREG_V3
      VREG_V4  0  VREG_V4
      VREG_V5  0  VREG_V5
      VREG_V6  0  VREG_V6
      VREG_V7  0  VREG_V7
      VREG_V8  0  VREG_V8
      VREG_V9  0  VREG_V9
      VREG_V10  0  VREG_V10
      VREG_V11  0  VREG_V11
      VREG_V12  0  VREG_V12
      VREG_V13  0  VREG_V13
      VREG_V14  0  VREG_V14
      VREG_V15  0  VREG_V15

      --N: o748  Replicate  === _ o193  [[ o1276 o1277 o1275 o1274 o1273 o1272 o1271 o1270 o1269 ]]  #vectora[4]:{double}
      VREG  0  VREG
      VREG_V1  0  VREG_V1
      VREG_V2  0  VREG_V2
      VREG_V3  0  VREG_V3
      VREG_V4  0  VREG_V4
      VREG_V5  0  VREG_V5
      VREG_V6  0  VREG_V6
      VREG_V7  0  VREG_V7
      VREG_V8  0  VREG_V8
      VREG_V9  0  VREG_V9
      VREG_V10  0  VREG_V10
      VREG_V11  0  VREG_V11
      VREG_V12  0  VREG_V12
      VREG_V13  0  VREG_V13
      VREG_V14  0  VREG_V14
      VREG_V15  0  VREG_V15

> src/hotspot/cpu/riscv/riscv_v.ad line 3675:
> 
>> 3673:     BasicType bt = Matcher::vector_element_basic_type(this);
>> 3674:     __ signum_fp_v(as_VectorRegister($dst$$reg), bt, Matcher::vector_length(this),
>> 3675:                   as_VectorRegister($zero$$reg), as_VectorRegister($one$$reg));
> 
> Nit: maybe leave one extra space here to align with parameters of the preceding line.

> Ah, I would suggest remove them for test coverity reasons. We can add them back when needed.

That's a good point! the code is removed.

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/16925#discussion_r1417144005
PR Review Comment: https://git.openjdk.org/jdk/pull/16925#discussion_r1417144210