RFR: 8329035: New Data Destination instructions support [v2]

Thu Sep 5 23:14:04 UTC 2024

On Tue, 3 Sep 2024 22:22:56 GMT, Steve Dohrmann <sdohrmann at openjdk.org> wrote:

>> Adds assembler support for APX New Data Destination (NDD) and No Flags (NF) features.  
>> 
>> The NDD feature is supported by new functions that take an additional destination-only register operand.  If the instruction also supports NF, a no_flags boolean parameter is present. To use these instructions with NF behavior, but without NDD semantics, the same register can be supplied for both the new destination and the (first) source operand.
>> 
>> Some instructions support NF but not NDD.  These instructions have a new function that just adds a boolean no_flags parameter.  Existing functions were not overloaded with a boolean here because of  signature collisions (bool / int) with functions that take immediate operands.
>> 
>> All of the new functions have a letter "e" prefix, to avoid signature collisions and to indicate they will be evex encoded.
>
> Steve Dohrmann has updated the pull request incrementally with one additional commit since the last revision:
> 
>   function name changes based on review comments

src/hotspot/cpu/x86/assembler_x86.cpp line 4583:

> 4581:   InstructionMark im(this);
> 4582:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 4583:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 4637:

> 4635:   InstructionMark im(this);
> 4636:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 4637:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 6593:

> 6591: 
> 6592: void Assembler::erolq(Register dst, Register src, bool no_flags) {
> 6593:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);

vex_w should be true here.

src/hotspot/cpu/x86/assembler_x86.cpp line 6647:

> 6645:   assert(isShiftCount(imm8), "illegal shift count");
> 6646:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 6647:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 6670:

> 6668:   InstructionMark im(this);
> 6669:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 6670:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 6686:

> 6684: }
> 6685: 
> 6686: void Assembler::esall(Register dst, Register src, int imm8, bool no_flags) {

assert(isShiftCount(imm8), "illegal shift count") missing.

src/hotspot/cpu/x86/assembler_x86.cpp line 6694:

> 6692:     emit_int24((unsigned char)0xC1, (0xF8 | encode), imm8);
> 6693:   }
> 6694: }

Should this be (0xE0 | encode)?

src/hotspot/cpu/x86/assembler_x86.cpp line 6722:

> 6720: }
> 6721: 
> 6722: void Assembler::esarl(Register dst, Address src, int imm8, bool no_flags) {

assert(isShiftCount(imm8), "illegal shift count") missing.

src/hotspot/cpu/x86/assembler_x86.cpp line 6725:

> 6723:   InstructionMark im(this);
> 6724:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 6725:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 6748:

> 6746:   InstructionMark im(this);
> 6747:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 6748:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 6764:

> 6762: }
> 6763: 
> 6764: void Assembler::esarl(Register dst, Register src, int imm8, bool no_flags) {

assert(isShiftCount(imm8), "illegal shift count") missing.

src/hotspot/cpu/x86/assembler_x86.cpp line 6794:

> 6792:   InstructionMark im(this);
> 6793:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 6794:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 6819:

> 6817: void Assembler::esbbl(Register dst, Register src1, Address src2) {
> 6818:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 6819:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 7546:

> 7544:   InstructionMark im(this);
> 7545:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 7546:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 7572:

> 7570:   InstructionMark im(this);
> 7571:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 7572:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bits should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 7600:

> 7598:   InstructionMark im(this);
> 7599:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 7600:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);

input_size_in_bytes should be EVEX_32bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 7645:

> 7643: void Assembler::exorw(Register dst, Register src1, Register src2, bool no_flags) {
> 7644:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 7645:   (void) evex_prefix_and_encode_ndd(src1->encoding(), dst->encoding(), src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);

VEX_SIMD_NONE should be VEX_SIMD_66.

src/hotspot/cpu/x86/assembler_x86.cpp line 7662:

> 7660:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 7661:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_64bit);
> 7662:   evex_prefix_ndd(src2, dst->encoding(), src1->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);

input_size_in_bits could be EVEX_16bit.
VEX_SIMD_NONE should be VEX_SIMD_66.

src/hotspot/cpu/x86/assembler_x86.cpp line 12251:

> 12249: 
> 12250: void Assembler::edecl(Register dst, Register src, bool no_flags) {
> 12251:   // Don't use it directly. Use MacroAssembler::deccrementl() instead.

This comment can be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 13706:

> 13704: 
> 13705: void Assembler::eincl(Register dst, Register src, bool no_flags) {
> 13706:   // Don't use it directly. Use MacroAssembler::incrementl() instead.

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 14788:

> 14786: void Assembler::edecl(Register dst, Register src, bool no_flags) {
> 14787:   // Don't use it directly. Use MacroAssembler::decrementl() instead.
> 14788:   // Use two-byte form (one-byte form is a REX prefix in 64-bit mode)

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 14803:

> 14801: void Assembler::edecq(Register dst, Register src, bool no_flags) {
> 14802:   // Don't use it directly. Use MacroAssembler::incrementq() instead.
> 14803:   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 14817:

> 14815: 
> 14816: void Assembler::edecq(Register dst, Address src, bool no_flags) {
> 14817:   // Don't use it directly. Use MacroAssembler::increment() instead.

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 14883:

> 14881: void Assembler::eimulq(Register dst, Register src, bool no_flags) {
> 14882:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 14883:   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);

Is there a reason we are not calling the evex_prefix_and_encode_nf here?

src/hotspot/cpu/x86/assembler_x86.cpp line 14900:

> 14898: void Assembler::eimulq(Register src, bool no_flags) {
> 14899:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 14900:   int encode = vex_prefix_and_encode(0, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);

Is there a reason we are not calling the evex_prefix_and_encode_nf here?

src/hotspot/cpu/x86/assembler_x86.cpp line 14921:

> 14919:   InstructionMark im(this);
> 14920:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 14921:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);

input_size_in_bits could be EVEX_64bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 14946:

> 14944: void Assembler::eimulq(Register dst, Register src, int value, bool no_flags) {
> 14945:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 14946:   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, /* src_is_gpr */ true, /* nds_is_ndd */ false, no_flags);

Is there a reason we are not calling the evex_prefix_and_encode_nf here?

src/hotspot/cpu/x86/assembler_x86.cpp line 14965:

> 14963:   InstructionMark im(this);
> 14964:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 14965:   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes, /* nds_is_ndd */ false, no_flags);

Is there a reason we are not calling the evex_prefix_nf here?

src/hotspot/cpu/x86/assembler_x86.cpp line 14973:

> 14971:   InstructionMark im(this);
> 14972:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 14973:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);

input_size_in_bits should be EVEX_64bit.

src/hotspot/cpu/x86/assembler_x86.cpp line 14988:

> 14986: void Assembler::eincl(Register dst, Register src, bool no_flags) {
> 14987:   // Don't use it directly. Use MacroAssembler::incrementl() instead.
> 14988:   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 15004:

> 15002: void Assembler::eincq(Register dst, Register src, bool no_flags) {
> 15003:   // Don't use it directly. Use MacroAssembler::incrementq() instead.
> 15004:   // Use two-byte form (one-byte from is a REX prefix in 64-bit mode)

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 15018:

> 15016: 
> 15017: void Assembler::eincq(Register dst, Address src, bool no_flags) {
> 15018:   // Don't use it directly. Use MacroAssembler::incrementq() instead.

This comment could be removed.

src/hotspot/cpu/x86/assembler_x86.cpp line 15827:

> 15825: 
> 15826: void Assembler::esarq(Register dst, Address src, int imm8, bool no_flags) {
> 15827:   InstructionMark im(this);

assert(isShiftCount(imm8 >> 1), "illegal shift count") is missing.

src/hotspot/cpu/x86/assembler_x86.cpp line 15885:

> 15883:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 15884:   int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags);
> 15885:   emit_int16((unsigned char)0xD1, (0xF8 | encode));

This should be:
 emit_int16((unsigned char)0xD3, (0xF8 | encode));
Shift by cl and not shift by 1.

src/hotspot/cpu/x86/assembler_x86.cpp line 15920:

> 15918: }
> 15919: 
> 15920: void Assembler::esbbq(Register dst, Register src1, Address src2) {

InstructionMark im(this) is missing.

src/hotspot/cpu/x86/assembler_x86.cpp line 15934:

> 15932: 
> 15933: void Assembler::esbbq(Register dst, Register src1, Register src2) {
> 15934:   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);

vex_w should be true here?

src/hotspot/cpu/x86/assembler_x86.cpp line 16039:

> 16037:   assert(isShiftCount(imm8 >> 1), "illegal shift count");
> 16038:   InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
> 16039:   attributes.set_address_attributes(/* tuple_type */ EVEX_NOSCALE, /* input_size_in_bits */ EVEX_32bit);

input_size_in_bits should be EVEX_64bit.

-------------

PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745963354
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745966767
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745985658
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745987552
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745988624
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745990265
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746011620
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745995095
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745995624
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1745996314
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746012273
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746013876
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746015511
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746059366
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746060252
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746108142
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746110091
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746113050
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746113669
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746115430
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746228953
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746229449
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746229826
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746237405
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746239674
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746243869
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746248995
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746253962
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746255625
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746257746
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746259847
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746262676
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746283748
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746287569
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746288704
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746289294
PR Review Comment: https://git.openjdk.org/jdk/pull/20698#discussion_r1746291123