RFR: Newer AMD 17h (EPYC) Processor family defaults

David Holmes david.holmes at oracle.com
Wed Sep 13 03:34:02 UTC 2017


Hi Rohit,

On 12/09/2017 2:52 PM, Rohit Arul Raj wrote:
> Hello David,
> 
>>>
>>>
>>> 1. ExtCpuid1EEx
>>>
>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>> inconsistent - and potentially confusing: I would have preferred to see
>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>
>> Yes, I can change it accordingly.

The parenthetical comment was wishful thinking - that the underscores 
had been used for all of these names in this code. The change I was 
suggesting was just use of ExtCpuid1EEbx.

I don't need to see further changes. This can be finalized when Vladimir 
pushes the change once the repo is open again.

Thanks,
David

> 
> I have attached the updated, re-tested patch as per your comments above.
> 
> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> b/src/cpu/x86/vm/vm_version_x86.cpp
> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> @@ -70,7 +70,7 @@
>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
> 
>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> done, wrapup;
> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> ext_cpuid8, done, wrapup;
>       Label legacy_setup, save_restore_except, legacy_save_restore,
> start_simd_check;
> 
>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> @@ -272,9 +272,23 @@
>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>       __ jccb(Assembler::belowEqual, ext_cpuid7);
> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> +    //
> +    // Extended cpuid(0x8000001E)
> +    //
> +    __ movl(rax, 0x8000001E);
> +    __ cpuid();
> +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid_1E_offset())));
> +    __ movl(Address(rsi, 0), rax);
> +    __ movl(Address(rsi, 4), rbx);
> +    __ movl(Address(rsi, 8), rcx);
> +    __ movl(Address(rsi,12), rdx);
> +
>       //
>       // Extended cpuid(0x80000008)
>       //
> +    __ bind(ext_cpuid8);
>       __ movl(rax, 0x80000008);
>       __ cpuid();
>       __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
> @@ -1109,11 +1123,27 @@
>       }
> 
>   #ifdef COMPILER2
> -    if (MaxVectorSize > 16) {
> -      // Limit vectors size to 16 bytes on current AMD cpus.
> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>       }
>   #endif // COMPILER2
> +
> +    // Some defaults for AMD family 17h
> +    if ( cpu_family() == 0x17 ) {
> +      // On family 17h processors use XMM and UnalignedLoadStores for
> Array Copy
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> +      }
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> +      }
> +#ifdef COMPILER2
> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> +      }
> +#endif
> +    }
>     }
> 
>     if( is_intel() ) { // Intel cpus specific settings
> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> b/src/cpu/x86/vm/vm_version_x86.hpp
> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> @@ -228,6 +228,15 @@
>       } bits;
>     };
> 
> +  union ExtCpuid_1E_Ebx {
> +    uint32_t value;
> +    struct {
> +      uint32_t                  : 8,
> +               threads_per_core : 8,
> +                                : 16;
> +    } bits;
> +  };
> +
>     union XemXcr0Eax {
>       uint32_t value;
>       struct {
> @@ -398,6 +407,12 @@
>       ExtCpuid8Ecx ext_cpuid8_ecx;
>       uint32_t     ext_cpuid8_edx; // reserved
> 
> +    // cpuid function 0x8000001E // AMD 17h
> +    uint32_t        ext_cpuid_1E_eax;
> +    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
> +    uint32_t        ext_cpuid_1E_ecx;
> +    uint32_t        ext_cpuid_1E_edx; // unused currently
> +
>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
>       XemXcr0Eax   xem_xcr0_eax;
>       uint32_t     xem_xcr0_edx; // reserved
> @@ -505,6 +520,14 @@
>         result |= CPU_CLMUL;
>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>         result |= CPU_RTM;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> +       result |= CPU_ADX;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> +      result |= CPU_BMI2;
> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> +      result |= CPU_SHA;
> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> +      result |= CPU_FMA;
> 
>       // AMD features.
>       if (is_amd()) {
> @@ -518,16 +541,8 @@
>       }
>       // Intel features.
>       if(is_intel()) {
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> -         result |= CPU_ADX;
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> -        result |= CPU_BMI2;
> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> -        result |= CPU_SHA;
>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>           result |= CPU_LZCNT;
> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> -        result |= CPU_FMA;
>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> support for prefetchw
>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>           result |= CPU_3DNOW_PREFETCH;
> @@ -590,6 +605,7 @@
>     static ByteSize ext_cpuid5_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>     static ByteSize ext_cpuid7_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>     static ByteSize ext_cpuid8_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> +  static ByteSize ext_cpuid_1E_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>     static ByteSize tpl_cpuidB0_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>     static ByteSize tpl_cpuidB1_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>     static ByteSize tpl_cpuidB2_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> @@ -673,8 +689,11 @@
>       if (is_intel() && supports_processor_topology()) {
>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> -               cores_per_cpu();
> +      if (cpu_family() >= 0x17)
> +        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core + 1;
> +      else
> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> +                 cores_per_cpu();
>       }
>       return (result == 0 ? 1 : result);
>     }
> 
> 
> Please let me know your comments
> 
> Thanks for your time.
> 
> Regards,
> Rohit
> 
> 
>>> Thanks,
>>> David
>>> -----
>>>
>>>
>>>> Reference:
>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
>>>> [Pg 82]
>>>>
>>>>       CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>         15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
>>>> The number of threads per core is ThreadsPerCore+1.
>>>>
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> @@ -70,7 +70,7 @@
>>>>        bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>
>>>>        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>> done, wrapup;
>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>> ext_cpuid8, done, wrapup;
>>>>        Label legacy_setup, save_restore_except, legacy_save_restore,
>>>> start_simd_check;
>>>>
>>>>        StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>> @@ -272,9 +272,23 @@
>>>>        __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>>        __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>> +    //
>>>> +    // Extended cpuid(0x8000001E)
>>>> +    //
>>>> +    __ movl(rax, 0x8000001E);
>>>> +    __ cpuid();
>>>> +    __ lea(rsi, Address(rbp,
>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>> +    __ movl(Address(rsi, 0), rax);
>>>> +    __ movl(Address(rsi, 4), rbx);
>>>> +    __ movl(Address(rsi, 8), rcx);
>>>> +    __ movl(Address(rsi,12), rdx);
>>>> +
>>>>        //
>>>>        // Extended cpuid(0x80000008)
>>>>        //
>>>> +    __ bind(ext_cpuid8);
>>>>        __ movl(rax, 0x80000008);
>>>>        __ cpuid();
>>>>        __ lea(rsi, Address(rbp,
>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>> @@ -1109,11 +1123,27 @@
>>>>        }
>>>>
>>>>    #ifdef COMPILER2
>>>> -    if (MaxVectorSize > 16) {
>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>        }
>>>>    #endif // COMPILER2
>>>> +
>>>> +    // Some defaults for AMD family 17h
>>>> +    if ( cpu_family() == 0x17 ) {
>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>> Array Copy
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>> +      }
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>> +      }
>>>> +#ifdef COMPILER2
>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>> +      }
>>>> +#endif
>>>> +    }
>>>>      }
>>>>
>>>>      if( is_intel() ) { // Intel cpus specific settings
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> @@ -228,6 +228,15 @@
>>>>        } bits;
>>>>      };
>>>>
>>>> +  union ExtCpuid1EEx {
>>>> +    uint32_t value;
>>>> +    struct {
>>>> +      uint32_t                  : 8,
>>>> +               threads_per_core : 8,
>>>> +                                : 16;
>>>> +    } bits;
>>>> +  };
>>>> +
>>>>      union XemXcr0Eax {
>>>>        uint32_t value;
>>>>        struct {
>>>> @@ -398,6 +407,12 @@
>>>>        ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>        uint32_t     ext_cpuid8_edx; // reserved
>>>>
>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>> +    uint32_t     ext_cpuid1E_eax;
>>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>> +    uint32_t     ext_cpuid1E_ecx;
>>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>>>> +
>>>>        // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>> register)
>>>>        XemXcr0Eax   xem_xcr0_eax;
>>>>        uint32_t     xem_xcr0_edx; // reserved
>>>> @@ -505,6 +520,14 @@
>>>>          result |= CPU_CLMUL;
>>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>          result |= CPU_RTM;
>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> +       result |= CPU_ADX;
>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> +      result |= CPU_BMI2;
>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> +      result |= CPU_SHA;
>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> +      result |= CPU_FMA;
>>>>
>>>>        // AMD features.
>>>>        if (is_amd()) {
>>>> @@ -518,16 +541,8 @@
>>>>        }
>>>>        // Intel features.
>>>>        if(is_intel()) {
>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> -         result |= CPU_ADX;
>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> -        result |= CPU_BMI2;
>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> -        result |= CPU_SHA;
>>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>            result |= CPU_LZCNT;
>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> -        result |= CPU_FMA;
>>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>> support for prefetchw
>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>            result |= CPU_3DNOW_PREFETCH;
>>>> @@ -590,6 +605,7 @@
>>>>      static ByteSize ext_cpuid5_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>      static ByteSize ext_cpuid7_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>      static ByteSize ext_cpuid8_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>      static ByteSize tpl_cpuidB0_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>      static ByteSize tpl_cpuidB1_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>      static ByteSize tpl_cpuidB2_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>> @@ -673,8 +689,11 @@
>>>>        if (is_intel() && supports_processor_topology()) {
>>>>          result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>        } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>> -               cores_per_cpu();
>>>> +      if (cpu_family() >= 0x17)
>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>>>> +      else
>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>> +                 cores_per_cpu();
>>>>        }
>>>>        return (result == 0 ? 1 : result);
>>>>      }
>>>>
>>>> I have attached the patch for review.
>>>> Please let me know your comments.
>>>>
>>>> Thanks,
>>>> Rohit
>>>>
>>>>> Thanks,
>>>>> Vladimir
>>>>>
>>>>>
>>>>>>
>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>
>>>>>> No comments on AMD specific changes.
>>>>>>
>>>>>> Thanks,
>>>>>> David
>>>>>> -----
>>>>>>
>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>
>>>>>>>
>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> Hello David,
>>>>>>>>
>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>> <david.holmes at oracle.com>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hi Rohit,
>>>>>>>>>
>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>>>>>
>>>>>>>>
>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent: 13548:1a9c2e07a826]
>>>>>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>> without any issues.
>>>>>>>> Can you share the error message that you are getting?
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> I was getting this:
>>>>>>>
>>>>>>> applying hotspot.patch
>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> Hunk #1 FAILED at 1108
>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> Hunk #2 FAILED at 522
>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>> abort: patch failed to apply
>>>>>>>
>>>>>>> but I started again and this time it applied fine, so not sure what was
>>>>>>> going on there.
>>>>>>>
>>>>>>> Cheers,
>>>>>>> David
>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Rohit
>>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>
>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>
>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>
>>>>>>>>>>>>> Changes look good. Only question I have is about MaxVectorSize.
>>>>>>>>>>>>> It
>>>>>>>>>>>>> is
>>>>>>>>>>>>> set
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>>>
>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD
>>>>>>>>>>>> 17h.
>>>>>>>>>>>> So
>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my patch.
>>>>>>>>>>>> I
>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> My older patch had the below mentioned check which was required on
>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>
>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>> ...
>>>>>>>>>> ...
>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>> +      }
>>>>>>>>>> ..
>>>>>>>>>> ..
>>>>>>>>>> +      }
>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>>>
>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag gets
>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is
>>>>>>>>>>>> there
>>>>>>>>>>>> an
>>>>>>>>>>>> underlying reason for this? I have handled this in the patch but
>>>>>>>>>>>> just
>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>> instructions
>>>>>>>>>>> to
>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>
>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>>>>
>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can execute
>>>>>>>>>>> that
>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2 instructions,
>>>>>>>>>> it should work.
>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>
>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>>>>
>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>
>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>          }
>>>>>>>>>>
>>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>          }
>>>>>>>>>>      #endif // COMPILER2
>>>>>>>>>> +
>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>> for
>>>>>>>>>> Array Copy
>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>> +      }
>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>> {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>> +      }
>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>> {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>> +      }
>>>>>>>>>> +#endif
>>>>>>>>>> +    }
>>>>>>>>>>        }
>>>>>>>>>>
>>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>
>>>>>>>>>>          // AMD features.
>>>>>>>>>>          if (is_amd()) {
>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>              result |= CPU_SSE4A;
>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>          }
>>>>>>>>>>          // Intel features.
>>>>>>>>>>          if(is_intel()) {
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>>>>> support for prefetchw
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>
>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>
>>>>>>>>>> Thanks for your time.
>>>>>>>>>> Rohit
>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>             }
>>>>>>>>>>>>             FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>           }
>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>>>>>> ||
>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>> CPU");
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>> +    }
>>>>>>>>>>>>
>>>>>>>>>>>>           // some defaults for AMD family 15h
>>>>>>>>>>>>           if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>           }
>>>>>>>>>>>>
>>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>           }
>>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>>> +
>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>>>> for
>>>>>>>>>>>> Array Copy
>>>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>> {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>> {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +#endif
>>>>>>>>>>>> +    }
>>>>>>>>>>>>         }
>>>>>>>>>>>>
>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>             result |= CPU_CLMUL;
>>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>             result |= CPU_RTM;
>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>
>>>>>>>>>>>>           // AMD features.
>>>>>>>>>>>>           if (is_amd()) {
>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>           }
>>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>> indicates
>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Regards,
>>>>>>>>>>>> Rohit
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see a
>>>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base,
>>>>>>>>>>>>>>> test
>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg ($make
>>>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Can anyone please volunteer to review this patch  which sets
>>>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>            // some defaults for AMD family 15h
>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>> for
>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>              result |= CPU_CLMUL;
>>>>>>>>>>>>>>            if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>              result |= CPU_RTM;
>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>            // AMD features.
>>>>>>>>>>>>>>            if (is_amd()) {
>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>              if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>              // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse !=
>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>                result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> **************************************************************
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch (openJDK9)
>>>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and
>>>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems
>>>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the mail
>>>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you will
>>>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to comment
>>>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>             // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>             if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>         #ifdef COMPILER2
>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>         #endif // COMPILER2
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>           if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>>>>                 result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>             // Intel features.
>>>>>>>>>>>>>>>>>             if(is_intel()) {
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>
>>>>>
>>>


More information about the hotspot-dev mailing list