RFR: Newer AMD 17h (EPYC) Processor family defaults

Mon Sep 11 07:50:57 UTC 2017

Hi Rohit,

Updated webrev at:

http://cr.openjdk.java.net/~dholmes/8187219/webrev.v2/

On 7/09/2017 3:24 PM, Rohit Arul Raj wrote:
> Hello Vladimir, David,
> 
>>
>>>
>>> You added:
>>>
>>>    526       if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>    527         result |= CPU_HT;
>>>
>>> and I'm wondering of there would be any case where this would not be
>>> covered by the earlier:
>>>
>>>    448     if (threads_per_core() > 1)
>>>    449       result |= CPU_HT;
>>>
>>> ?
>>> ---
>>
>>
>> Valid question.
>>
> 
> Thanks for your review and comments.
> I have updated the patch to calculate threads per core by using the
> CPUID bit: CPUID_Fn8000001E_EBX [8:15].

This detail is getting beyond my knowledge I'm afraid. I have two queries:

1. ExtCpuid1EEx

Should this be ExtCpuid1EEbx? (I see the naming here is somewhat 
inconsistent - and potentially confusing: I would have preferred to see 
things like ExtCpuid_1E_Ebx, to make it clear.)

2. You fixed the calculation of threads_per_core by reading it directly, 
but I have to wonder which part of the old calculation:

695         result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
696                  cores_per_cpu();

was actually going wrong in the case of the 17h?

Thanks,
David
-----

> Reference:
> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf [Pg 82]
> 
>      CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>        15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
> The number of threads per core is ThreadsPerCore+1.
> 
> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> b/src/cpu/x86/vm/vm_version_x86.cpp
> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> @@ -70,7 +70,7 @@
>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
> 
>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> done, wrapup;
> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> ext_cpuid8, done, wrapup;
>       Label legacy_setup, save_restore_except, legacy_save_restore,
> start_simd_check;
> 
>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> @@ -272,9 +272,23 @@
>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>       __ jccb(Assembler::belowEqual, ext_cpuid7);
> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> +    //
> +    // Extended cpuid(0x8000001E)
> +    //
> +    __ movl(rax, 0x8000001E);
> +    __ cpuid();
> +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
> +    __ movl(Address(rsi, 0), rax);
> +    __ movl(Address(rsi, 4), rbx);
> +    __ movl(Address(rsi, 8), rcx);
> +    __ movl(Address(rsi,12), rdx);
> +
>       //
>       // Extended cpuid(0x80000008)
>       //
> +    __ bind(ext_cpuid8);
>       __ movl(rax, 0x80000008);
>       __ cpuid();
>       __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
> @@ -1109,11 +1123,27 @@
>       }
> 
>   #ifdef COMPILER2
> -    if (MaxVectorSize > 16) {
> -      // Limit vectors size to 16 bytes on current AMD cpus.
> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>       }
>   #endif // COMPILER2
> +
> +    // Some defaults for AMD family 17h
> +    if ( cpu_family() == 0x17 ) {
> +      // On family 17h processors use XMM and UnalignedLoadStores for
> Array Copy
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> +      }
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> +      }
> +#ifdef COMPILER2
> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> +      }
> +#endif
> +    }
>     }
> 
>     if( is_intel() ) { // Intel cpus specific settings
> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> b/src/cpu/x86/vm/vm_version_x86.hpp
> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> @@ -228,6 +228,15 @@
>       } bits;
>     };
> 
> +  union ExtCpuid1EEx {
> +    uint32_t value;
> +    struct {
> +      uint32_t                  : 8,
> +               threads_per_core : 8,
> +                                : 16;
> +    } bits;
> +  };
> +
>     union XemXcr0Eax {
>       uint32_t value;
>       struct {
> @@ -398,6 +407,12 @@
>       ExtCpuid8Ecx ext_cpuid8_ecx;
>       uint32_t     ext_cpuid8_edx; // reserved
> 
> +    // cpuid function 0x8000001E // AMD 17h
> +    uint32_t     ext_cpuid1E_eax;
> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
> +    uint32_t     ext_cpuid1E_ecx;
> +    uint32_t     ext_cpuid1E_edx; // unused currently
> +
>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
>       XemXcr0Eax   xem_xcr0_eax;
>       uint32_t     xem_xcr0_edx; // reserved
> @@ -505,6 +520,14 @@
>         result |= CPU_CLMUL;
>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>         result |= CPU_RTM;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> +       result |= CPU_ADX;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> +      result |= CPU_BMI2;
> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> +      result |= CPU_SHA;
> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> +      result |= CPU_FMA;
> 
>       // AMD features.
>       if (is_amd()) {
> @@ -518,16 +541,8 @@
>       }
>       // Intel features.
>       if(is_intel()) {
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> -         result |= CPU_ADX;
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> -        result |= CPU_BMI2;
> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> -        result |= CPU_SHA;
>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>           result |= CPU_LZCNT;
> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> -        result |= CPU_FMA;
>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> support for prefetchw
>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>           result |= CPU_3DNOW_PREFETCH;
> @@ -590,6 +605,7 @@
>     static ByteSize ext_cpuid5_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>     static ByteSize ext_cpuid7_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>     static ByteSize ext_cpuid8_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> +  static ByteSize ext_cpuid1E_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>     static ByteSize tpl_cpuidB0_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>     static ByteSize tpl_cpuidB1_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>     static ByteSize tpl_cpuidB2_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> @@ -673,8 +689,11 @@
>       if (is_intel() && supports_processor_topology()) {
>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> -               cores_per_cpu();
> +      if (cpu_family() >= 0x17)
> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
> +      else
> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> +                 cores_per_cpu();
>       }
>       return (result == 0 ? 1 : result);
>     }
> 
> I have attached the patch for review.
> Please let me know your comments.
> 
> Thanks,
> Rohit
> 
>> Thanks,
>> Vladimir
>>
>>
>>>
>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>
>>> No comments on AMD specific changes.
>>>
>>> Thanks,
>>> David
>>> -----
>>>
>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>
>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>
>>>>> Hello David,
>>>>>
>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes <david.holmes at oracle.com>
>>>>> wrote:
>>>>>>
>>>>>> Hi Rohit,
>>>>>>
>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>>
>>>>>
>>>>> I checked out the latest jdk10/hs/hotspot [parent: 13548:1a9c2e07a826]
>>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>>> without any issues.
>>>>> Can you share the error message that you are getting?
>>>>
>>>>
>>>> I was getting this:
>>>>
>>>> applying hotspot.patch
>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>> Hunk #1 FAILED at 1108
>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>> Hunk #2 FAILED at 522
>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>> abort: patch failed to apply
>>>>
>>>> but I started again and this time it applied fine, so not sure what was
>>>> going on there.
>>>>
>>>> Cheers,
>>>> David
>>>>
>>>>> Regards,
>>>>> Rohit
>>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>
>>>>>>>
>>>>>>> Hello Vladimir,
>>>>>>>
>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> Hi Rohit,
>>>>>>>>
>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hello Vladimir,
>>>>>>>>>
>>>>>>>>>> Changes look good. Only question I have is about MaxVectorSize. It
>>>>>>>>>> is
>>>>>>>>>> set
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>
>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD 17h.
>>>>>>>>> So
>>>>>>>>> I have removed the surplus check for MaxVectorSize from my patch. I
>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Which check you removed?
>>>>>>>>
>>>>>>>
>>>>>>> My older patch had the below mentioned check which was required on
>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>
>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>> ...
>>>>>>> ...
>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>> +      }
>>>>>>> ..
>>>>>>> ..
>>>>>>> +      }
>>>>>>>
>>>>>>>>>
>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>
>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag gets
>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is there
>>>>>>>>> an
>>>>>>>>> underlying reason for this? I have handled this in the patch but
>>>>>>>>> just
>>>>>>>>> wanted to confirm.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>> instructions
>>>>>>>> to
>>>>>>>> calculate SHA-256:
>>>>>>>>
>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>
>>>>>>>> I don't know if AMD 15h supports these instructions and can execute
>>>>>>>> that
>>>>>>>> code. You need to test it.
>>>>>>>>
>>>>>>>
>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2 instructions,
>>>>>>> it should work.
>>>>>>> Confirmed by running following sanity tests:
>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>
>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>
>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>
>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>
>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>         }
>>>>>>>
>>>>>>>     #ifdef COMPILER2
>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>         }
>>>>>>>     #endif // COMPILER2
>>>>>>> +
>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>>>>> Array Copy
>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>> +      }
>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>> {
>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>> +      }
>>>>>>> +#ifdef COMPILER2
>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>> +      }
>>>>>>> +#endif
>>>>>>> +    }
>>>>>>>       }
>>>>>>>
>>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>           result |= CPU_CLMUL;
>>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>           result |= CPU_RTM;
>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> +       result |= CPU_ADX;
>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> +      result |= CPU_BMI2;
>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> +      result |= CPU_SHA;
>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> +      result |= CPU_FMA;
>>>>>>>
>>>>>>>         // AMD features.
>>>>>>>         if (is_amd()) {
>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>             result |= CPU_LZCNT;
>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>             result |= CPU_SSE4A;
>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>> +        result |= CPU_HT;
>>>>>>>         }
>>>>>>>         // Intel features.
>>>>>>>         if(is_intel()) {
>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> -         result |= CPU_ADX;
>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> -        result |= CPU_BMI2;
>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> -        result |= CPU_SHA;
>>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>             result |= CPU_LZCNT;
>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> -        result |= CPU_FMA;
>>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>> support for prefetchw
>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>>>
>>>>>>> Please let me know your comments.
>>>>>>>
>>>>>>> Thanks for your time.
>>>>>>> Rohit
>>>>>>>
>>>>>>>>>
>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>
>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>            }
>>>>>>>>>            FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>          }
>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>> +      }
>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>>> ||
>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>> +        warning("SHA instructions are not available on this CPU");
>>>>>>>>> +      }
>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>> +    }
>>>>>>>>>
>>>>>>>>>          // some defaults for AMD family 15h
>>>>>>>>>          if ( cpu_family() == 0x15 ) {
>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>          }
>>>>>>>>>
>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>          }
>>>>>>>>>      #endif // COMPILER2
>>>>>>>>> +
>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>> for
>>>>>>>>> Array Copy
>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>> +      }
>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>> +      }
>>>>>>>>> +      if (supports_bmi2() && FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>> {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>> +      }
>>>>>>>>> +      if (UseSHA) {
>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>>> functions not available on this CPU.");
>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>> +        }
>>>>>>>>> +      }
>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>> +        }
>>>>>>>>> +      }
>>>>>>>>> +#endif
>>>>>>>>> +    }
>>>>>>>>>        }
>>>>>>>>>
>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>
>>>>>>>>>          // AMD features.
>>>>>>>>>          if (is_amd()) {
>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>              result |= CPU_SSE4A;
>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>          }
>>>>>>>>>          // Intel features.
>>>>>>>>>          if(is_intel()) {
>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>>>> support for prefetchw
>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Rohit
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>
>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see a
>>>>>>>>>>>>> lot of
>>>>>>>>>>>>> logic
>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>> David
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base, test
>>>>>>>>>>>> and
>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>
>>>>>>>>>>>> Regards,
>>>>>>>>>>>> Rohit
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Hi All,
>>>>>>>>>>>
>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg ($make
>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>
>>>>>>>>>>> Can anyone please volunteer to review this patch  which sets
>>>>>>>>>>> flag/ISA
>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>
>>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>             }
>>>>>>>>>>>             FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>           }
>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>> +      }
>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>>>>> ||
>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>> CPU");
>>>>>>>>>>> +      }
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>> +    }
>>>>>>>>>>>
>>>>>>>>>>>           // some defaults for AMD family 15h
>>>>>>>>>>>           if ( cpu_family() == 0x15 ) {
>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>           }
>>>>>>>>>>>
>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>           }
>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>> +
>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>>> for
>>>>>>>>>>> Array Copy
>>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>> {
>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>> {
>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>> +        }
>>>>>>>>>>> +      }
>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>> +        }
>>>>>>>>>>> +      }
>>>>>>>>>>> +#endif
>>>>>>>>>>> +    }
>>>>>>>>>>>         }
>>>>>>>>>>>
>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>             result |= CPU_CLMUL;
>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>             result |= CPU_RTM;
>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>
>>>>>>>>>>>           // AMD features.
>>>>>>>>>>>           if (is_amd()) {
>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>           }
>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>> indicates
>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>
>>>>>>>>>>> **************************************************************
>>>>>>>>>>>
>>>>>>>>>>> Thanks,
>>>>>>>>>>> Rohit
>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I would like an volunteer to review this patch (openJDK9)
>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and help
>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems outside
>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the mail
>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you will
>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on cr.openjdk.java.net.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make default)
>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to comment
>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>            // some defaults for AMD family 15h
>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>> for
>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>
>>>>>>
>>