RFR: Newer AMD 17h (EPYC) Processor family defaults

Fri Nov 3 07:21:33 UTC 2017

Vladimir, Thanks for the info.

Regards,
Rohit

On Fri, Nov 3, 2017 at 10:33 AM, Vladimir Kozlov <vladimir.kozlov at oracle.com
> wrote:

> It was pushed long ago - Oct 3, see:
>
> http://hg.openjdk.java.net/jdk10/hs/rev/fde01e0fccb4
> https://bugs.openjdk.java.net/browse/JDK-8187219
>
> But we have to repositories jdk10/jdk10 master and jdk10/hs for Hotspot
> changes. We did not promoted changes from jdk10/hs to jdk10/jdk10. Testing
> currently is under way to do promotion. But I can not say in which jdk 10
> build it will happened. Look for push notification in JDK-8187219.
>
> Regards,
> Vladimir
>
> On 11/2/17 8:30 PM, Rohit Arul Raj wrote:
>
>> Hello Vladimir,
>>
>> Is there any update on pushing these changes to jdk10?
>>
>> Thanks,
>> Rohit
>>
>> On Fri, Sep 22, 2017 at 1:11 PM, Rohit Arul Raj <rohitarulraj at gmail.com>
>> wrote:
>>
>> Thanks Vladimir,
>>>
>>> On Wed, Sep 20, 2017 at 10:07 PM, Vladimir Kozlov
>>> <vladimir.kozlov at oracle.com> wrote:
>>>
>>>>        __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>>>>        __ jcc(Assembler::belowEqual, done);
>>>>>        __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>>>>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>>>>> +   __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>>>
>>>>
>>>>
>>>> Good. You may need to increase size of the buffer too (to be safe) to
>>>>
>>> 1100:
>>>
>>>>
>>>> static const int stub_size = 1000;
>>>>
>>>>
>>> Please find the updated patch after the requested change.
>>>
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>> @@ -46,7 +46,7 @@
>>>   address VM_Version::_cpuinfo_cont_addr = 0;
>>>
>>>   static BufferBlob* stub_blob;
>>> -static const int stub_size = 1000;
>>> +static const int stub_size = 1100;
>>>
>>>   extern "C" {
>>>     typedef void (*get_cpu_info_stub_t)(void*);
>>> @@ -70,7 +70,7 @@
>>>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>
>>>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> done, wrapup;
>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> ext_cpuid8, done, wrapup;
>>>       Label legacy_setup, save_restore_except, legacy_save_restore,
>>> start_simd_check;
>>>
>>>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>> @@ -267,14 +267,30 @@
>>>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>>       __ jcc(Assembler::belowEqual, done);
>>>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>>> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>       __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>>>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>       __ jccb(Assembler::belowEqual, ext_cpuid7);
>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
>>> supported?
>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
>>> +    __ jccb(Assembler::below, ext_cpuid8);
>>> +    //
>>> +    // Extended cpuid(0x8000001E)
>>> +    //
>>> +    __ movl(rax, 0x8000001E);
>>> +    __ cpuid();
>>> +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_
>>> cpuid1E_offset())));
>>> +    __ movl(Address(rsi, 0), rax);
>>> +    __ movl(Address(rsi, 4), rbx);
>>> +    __ movl(Address(rsi, 8), rcx);
>>> +    __ movl(Address(rsi,12), rdx);
>>> +
>>>       //
>>>       // Extended cpuid(0x80000008)
>>>       //
>>> +    __ bind(ext_cpuid8);
>>>       __ movl(rax, 0x80000008);
>>>       __ cpuid();
>>>       __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid
>>> 8_offset())));
>>> @@ -1109,11 +1125,27 @@
>>>       }
>>>
>>>   #ifdef COMPILER2
>>> -    if (MaxVectorSize > 16) {
>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>       }
>>>   #endif // COMPILER2
>>> +
>>> +    // Some defaults for AMD family 17h
>>> +    if ( cpu_family() == 0x17 ) {
>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>> Array Copy
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>> +      }
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>> +      }
>>> +#ifdef COMPILER2
>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>> +      }
>>> +#endif
>>> +    }
>>>     }
>>>
>>>     if( is_intel() ) { // Intel cpus specific settings
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>> @@ -228,6 +228,15 @@
>>>       } bits;
>>>     };
>>>
>>> +  union ExtCpuid1EEbx {
>>> +    uint32_t value;
>>> +    struct {
>>> +      uint32_t                  : 8,
>>> +               threads_per_core : 8,
>>> +                                : 16;
>>> +    } bits;
>>> +  };
>>> +
>>>     union XemXcr0Eax {
>>>       uint32_t value;
>>>       struct {
>>> @@ -398,6 +407,12 @@
>>>       ExtCpuid8Ecx ext_cpuid8_ecx;
>>>       uint32_t     ext_cpuid8_edx; // reserved
>>>
>>> +    // cpuid function 0x8000001E // AMD 17h
>>> +    uint32_t      ext_cpuid1E_eax;
>>> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>> +    uint32_t      ext_cpuid1E_ecx;
>>> +    uint32_t      ext_cpuid1E_edx; // unused currently
>>> +
>>>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>> register)
>>>       XemXcr0Eax   xem_xcr0_eax;
>>>       uint32_t     xem_xcr0_edx; // reserved
>>> @@ -505,6 +520,14 @@
>>>         result |= CPU_CLMUL;
>>>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>         result |= CPU_RTM;
>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> +       result |= CPU_ADX;
>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> +      result |= CPU_BMI2;
>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> +      result |= CPU_SHA;
>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> +      result |= CPU_FMA;
>>>
>>>       // AMD features.
>>>       if (is_amd()) {
>>> @@ -518,16 +541,8 @@
>>>       }
>>>       // Intel features.
>>>       if(is_intel()) {
>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> -         result |= CPU_ADX;
>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> -        result |= CPU_BMI2;
>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> -        result |= CPU_SHA;
>>>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>           result |= CPU_LZCNT;
>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> -        result |= CPU_FMA;
>>>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>> support for prefetchw
>>>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>           result |= CPU_3DNOW_PREFETCH;
>>> @@ -590,6 +605,7 @@
>>>     static ByteSize ext_cpuid5_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>     static ByteSize ext_cpuid7_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>     static ByteSize ext_cpuid8_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>> +  static ByteSize ext_cpuid1E_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>     static ByteSize tpl_cpuidB0_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>     static ByteSize tpl_cpuidB1_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>     static ByteSize tpl_cpuidB2_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>> @@ -673,8 +689,12 @@
>>>       if (is_intel() && supports_processor_topology()) {
>>>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> -               cores_per_cpu();
>>> +      if (cpu_family() >= 0x17) {
>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>>> +      } else {
>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> +                 cores_per_cpu();
>>> +      }
>>>       }
>>>       return (result == 0 ? 1 : result);
>>>     }
>>>
>>> Regards,
>>> Rohit
>>>
>>>
>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> @@ -70,7 +70,7 @@
>>>>>        bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>
>>>>>        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>> done, wrapup;
>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>> ext_cpuid8, done, wrapup;
>>>>>        Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>> start_simd_check;
>>>>>
>>>>>        StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>> @@ -267,14 +267,30 @@
>>>>>        __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>>>>        __ jcc(Assembler::belowEqual, done);
>>>>>        __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>>>>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>>>>> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>>>        __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>>>>>        __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>>>        __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
>>>>> supported?
>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
>>>>> +    __ jccb(Assembler::below, ext_cpuid8);
>>>>> +    //
>>>>> +    // Extended cpuid(0x8000001E)
>>>>> +    //
>>>>> +    __ movl(rax, 0x8000001E);
>>>>> +    __ cpuid();
>>>>> +    __ lea(rsi, Address(rbp,
>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>> +
>>>>>        //
>>>>>        // Extended cpuid(0x80000008)
>>>>>        //
>>>>> +    __ bind(ext_cpuid8);
>>>>>        __ movl(rax, 0x80000008);
>>>>>        __ cpuid();
>>>>>        __ lea(rsi, Address(rbp,
>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>> @@ -1109,11 +1125,27 @@
>>>>>        }
>>>>>
>>>>>    #ifdef COMPILER2
>>>>> -    if (MaxVectorSize > 16) {
>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>        }
>>>>>    #endif // COMPILER2
>>>>> +
>>>>> +    // Some defaults for AMD family 17h
>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>>> Array Copy
>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>> +      }
>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>
>>>> {
>>>
>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>> +      }
>>>>> +#ifdef COMPILER2
>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>> +      }
>>>>> +#endif
>>>>> +    }
>>>>>      }
>>>>>
>>>>>      if( is_intel() ) { // Intel cpus specific settings
>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> @@ -228,6 +228,15 @@
>>>>>        } bits;
>>>>>      };
>>>>>
>>>>> +  union ExtCpuid1EEbx {
>>>>> +    uint32_t value;
>>>>> +    struct {
>>>>> +      uint32_t                  : 8,
>>>>> +               threads_per_core : 8,
>>>>> +                                : 16;
>>>>> +    } bits;
>>>>> +  };
>>>>> +
>>>>>      union XemXcr0Eax {
>>>>>        uint32_t value;
>>>>>        struct {
>>>>> @@ -398,6 +407,12 @@
>>>>>        ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>        uint32_t     ext_cpuid8_edx; // reserved
>>>>>
>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>> +    uint32_t      ext_cpuid1E_eax;
>>>>> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>> +    uint32_t      ext_cpuid1E_ecx;
>>>>> +    uint32_t      ext_cpuid1E_edx; // unused currently
>>>>> +
>>>>>        // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>> register)
>>>>>        XemXcr0Eax   xem_xcr0_eax;
>>>>>        uint32_t     xem_xcr0_edx; // reserved
>>>>> @@ -505,6 +520,14 @@
>>>>>          result |= CPU_CLMUL;
>>>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>          result |= CPU_RTM;
>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>> +       result |= CPU_ADX;
>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>> +      result |= CPU_BMI2;
>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>> +      result |= CPU_SHA;
>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>> +      result |= CPU_FMA;
>>>>>
>>>>>        // AMD features.
>>>>>        if (is_amd()) {
>>>>> @@ -518,16 +541,8 @@
>>>>>        }
>>>>>        // Intel features.
>>>>>        if(is_intel()) {
>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>> -         result |= CPU_ADX;
>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>> -        result |= CPU_BMI2;
>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>> -        result |= CPU_SHA;
>>>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>            result |= CPU_LZCNT;
>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>> -        result |= CPU_FMA;
>>>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>> support for prefetchw
>>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>            result |= CPU_3DNOW_PREFETCH;
>>>>> @@ -590,6 +605,7 @@
>>>>>      static ByteSize ext_cpuid5_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>      static ByteSize ext_cpuid7_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>      static ByteSize ext_cpuid8_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>>      static ByteSize tpl_cpuidB0_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>      static ByteSize tpl_cpuidB1_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>      static ByteSize tpl_cpuidB2_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>> @@ -673,8 +689,12 @@
>>>>>        if (is_intel() && supports_processor_topology()) {
>>>>>          result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>        } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>> -               cores_per_cpu();
>>>>> +      if (cpu_family() >= 0x17) {
>>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
>>>>>
>>>> 1;
>>>
>>>> +      } else {
>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>> +                 cores_per_cpu();
>>>>> +      }
>>>>>        }
>>>>>        return (result == 0 ? 1 : result);
>>>>>      }
>>>>>
>>>>> Please let me know your comments.
>>>>> Thanks for your review.
>>>>>
>>>>> Regards,
>>>>> Rohit
>>>>>
>>>>>
>>>>>>
>>>>>> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Hello David,
>>>>>>>
>>>>>>>
>>>>>>>>>
>>>>>>>>> 1. ExtCpuid1EEx
>>>>>>>>>
>>>>>>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>>>>>>>> inconsistent - and potentially confusing: I would have preferred to
>>>>>>>>> see
>>>>>>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Yes, I can change it accordingly.
>>>>>>>>
>>>>>>>>
>>>>>>> I have attached the updated, re-tested patch as per your comments
>>>>>>>
>>>>>> above.
>>>
>>>>
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> @@ -70,7 +70,7 @@
>>>>>>>         bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>>
>>>>>>>         Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>>> done, wrapup;
>>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>>> ext_cpuid8, done, wrapup;
>>>>>>>         Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>>>> start_simd_check;
>>>>>>>
>>>>>>>         StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>>>> @@ -272,9 +272,23 @@
>>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>>         __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
>>>>>>>
>>>>>> supported?
>>>
>>>>         __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>>> +    //
>>>>>>> +    // Extended cpuid(0x8000001E)
>>>>>>> +    //
>>>>>>> +    __ movl(rax, 0x8000001E);
>>>>>>> +    __ cpuid();
>>>>>>> +    __ lea(rsi, Address(rbp,
>>>>>>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
>>>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>>>> +
>>>>>>>         //
>>>>>>>         // Extended cpuid(0x80000008)
>>>>>>>         //
>>>>>>> +    __ bind(ext_cpuid8);
>>>>>>>         __ movl(rax, 0x80000008);
>>>>>>>         __ cpuid();
>>>>>>>         __ lea(rsi, Address(rbp,
>>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>>         }
>>>>>>>
>>>>>>>     #ifdef COMPILER2
>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>         }
>>>>>>>     #endif // COMPILER2
>>>>>>> +
>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>> for
>>>>>>> Array Copy
>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>> +      }
>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLo
>>>>>>> adStores))
>>>>>>>
>>>>>> {
>>>
>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>> +      }
>>>>>>> +#ifdef COMPILER2
>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>> {
>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>> +      }
>>>>>>> +#endif
>>>>>>> +    }
>>>>>>>       }
>>>>>>>
>>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> @@ -228,6 +228,15 @@
>>>>>>>         } bits;
>>>>>>>       };
>>>>>>>
>>>>>>> +  union ExtCpuid_1E_Ebx {
>>>>>>> +    uint32_t value;
>>>>>>> +    struct {
>>>>>>> +      uint32_t                  : 8,
>>>>>>> +               threads_per_core : 8,
>>>>>>> +                                : 16;
>>>>>>> +    } bits;
>>>>>>> +  };
>>>>>>> +
>>>>>>>       union XemXcr0Eax {
>>>>>>>         uint32_t value;
>>>>>>>         struct {
>>>>>>> @@ -398,6 +407,12 @@
>>>>>>>         ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>>         uint32_t     ext_cpuid8_edx; // reserved
>>>>>>>
>>>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>>>> +    uint32_t        ext_cpuid_1E_eax;
>>>>>>> +    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
>>>>>>> +    uint32_t        ext_cpuid_1E_ecx;
>>>>>>> +    uint32_t        ext_cpuid_1E_edx; // unused currently
>>>>>>> +
>>>>>>>         // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>>>> register)
>>>>>>>         XemXcr0Eax   xem_xcr0_eax;
>>>>>>>         uint32_t     xem_xcr0_edx; // reserved
>>>>>>> @@ -505,6 +520,14 @@
>>>>>>>           result |= CPU_CLMUL;
>>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>           result |= CPU_RTM;
>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> +       result |= CPU_ADX;
>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> +      result |= CPU_BMI2;
>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> +      result |= CPU_SHA;
>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> +      result |= CPU_FMA;
>>>>>>>
>>>>>>>         // AMD features.
>>>>>>>         if (is_amd()) {
>>>>>>> @@ -518,16 +541,8 @@
>>>>>>>         }
>>>>>>>         // Intel features.
>>>>>>>         if(is_intel()) {
>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> -         result |= CPU_ADX;
>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> -        result |= CPU_BMI2;
>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> -        result |= CPU_SHA;
>>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>             result |= CPU_LZCNT;
>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> -        result |= CPU_FMA;
>>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>> support for prefetchw
>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>>> @@ -590,6 +605,7 @@
>>>>>>>       static ByteSize ext_cpuid5_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>>       static ByteSize ext_cpuid7_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>>       static ByteSize ext_cpuid8_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>>> +  static ByteSize ext_cpuid_1E_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>>>>>>>       static ByteSize tpl_cpuidB0_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>>       static ByteSize tpl_cpuidB1_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>>       static ByteSize tpl_cpuidB2_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>>> @@ -673,8 +689,11 @@
>>>>>>>         if (is_intel() && supports_processor_topology()) {
>>>>>>>           result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>>>         } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>> -               cores_per_cpu();
>>>>>>> +      if (cpu_family() >= 0x17)
>>>>>>> +        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core
>>>>>>>
>>>>>> +
>>>
>>>> 1;
>>>>>>> +      else
>>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>> +                 cores_per_cpu();
>>>>>>>         }
>>>>>>>         return (result == 0 ? 1 : result);
>>>>>>>       }
>>>>>>>
>>>>>>>
>>>>>>> Please let me know your comments
>>>>>>>
>>>>>>> Thanks for your time.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Rohit
>>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>>> David
>>>>>>>>> -----
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Reference:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_
>>>>>>>>>>
>>>>>>>>> Models_00h-0Fh.pdf
>>>
>>>> [Pg 82]
>>>>>>>>>>
>>>>>>>>>>         CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>>>>>>>           15:8 ThreadsPerCore: threads per core. Read-only. Reset:
>>>>>>>>>> XXh.
>>>>>>>>>> The number of threads per core is ThreadsPerCore+1.
>>>>>>>>>>
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> @@ -70,7 +70,7 @@
>>>>>>>>>>          bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>>>>>
>>>>>>>>>>          Label detect_486, cpu486, detect_586, std_cpuid1,
>>>>>>>>>>
>>>>>>>>> std_cpuid4;
>>>
>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
>>>>>>>>>>
>>>>>>>>> ext_cpuid7,
>>>
>>>> done, wrapup;
>>>>>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
>>>>>>>>>>
>>>>>>>>> ext_cpuid7,
>>>
>>>> ext_cpuid8, done, wrapup;
>>>>>>>>>>          Label legacy_setup, save_restore_except,
>>>>>>>>>>
>>>>>>>>> legacy_save_restore,
>>>
>>>> start_simd_check;
>>>>>>>>>>
>>>>>>>>>>          StubCodeMark mark(this, "VM_Version",
>>>>>>>>>> "get_cpu_info_stub");
>>>>>>>>>> @@ -272,9 +272,23 @@
>>>>>>>>>>          __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>>>>>          __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
>>>>>>>>>> supported?
>>>>>>>>>>          __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E)
>>>>>>>>>>
>>>>>>>>> supported?
>>>
>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>>>>>> +    //
>>>>>>>>>> +    // Extended cpuid(0x8000001E)
>>>>>>>>>> +    //
>>>>>>>>>> +    __ movl(rax, 0x8000001E);
>>>>>>>>>> +    __ cpuid();
>>>>>>>>>> +    __ lea(rsi, Address(rbp,
>>>>>>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>>>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>>>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>>>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>>>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>>>>>>> +
>>>>>>>>>>          //
>>>>>>>>>>          // Extended cpuid(0x80000008)
>>>>>>>>>>          //
>>>>>>>>>> +    __ bind(ext_cpuid8);
>>>>>>>>>>          __ movl(rax, 0x80000008);
>>>>>>>>>>          __ cpuid();
>>>>>>>>>>          __ lea(rsi, Address(rbp,
>>>>>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>>>>>          }
>>>>>>>>>>
>>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>          }
>>>>>>>>>>      #endif // COMPILER2
>>>>>>>>>> +
>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>> for
>>>>>>>>>> Array Copy
>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArray
>>>>>>>>>> Copy))
>>>>>>>>>>
>>>>>>>>> {
>>>
>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>> +      }
>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>> {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>> +      }
>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpill
>>>>>>>>>> ing))
>>>>>>>>>>
>>>>>>>>> {
>>>
>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>> +      }
>>>>>>>>>> +#endif
>>>>>>>>>> +    }
>>>>>>>>>>        }
>>>>>>>>>>
>>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> @@ -228,6 +228,15 @@
>>>>>>>>>>          } bits;
>>>>>>>>>>        };
>>>>>>>>>>
>>>>>>>>>> +  union ExtCpuid1EEx {
>>>>>>>>>> +    uint32_t value;
>>>>>>>>>> +    struct {
>>>>>>>>>> +      uint32_t                  : 8,
>>>>>>>>>> +               threads_per_core : 8,
>>>>>>>>>> +                                : 16;
>>>>>>>>>> +    } bits;
>>>>>>>>>> +  };
>>>>>>>>>> +
>>>>>>>>>>        union XemXcr0Eax {
>>>>>>>>>>          uint32_t value;
>>>>>>>>>>          struct {
>>>>>>>>>> @@ -398,6 +407,12 @@
>>>>>>>>>>          ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>>>>>          uint32_t     ext_cpuid8_edx; // reserved
>>>>>>>>>>
>>>>>>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>>>>>>> +    uint32_t     ext_cpuid1E_eax;
>>>>>>>>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>>>>>>> +    uint32_t     ext_cpuid1E_ecx;
>>>>>>>>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>>>>>>>>>> +
>>>>>>>>>>          // extended control register XCR0 (the
>>>>>>>>>>
>>>>>>>>> XFEATURE_ENABLED_MASK
>>>
>>>> register)
>>>>>>>>>>          XemXcr0Eax   xem_xcr0_eax;
>>>>>>>>>>          uint32_t     xem_xcr0_edx; // reserved
>>>>>>>>>> @@ -505,6 +520,14 @@
>>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>
>>>>>>>>>>          // AMD features.
>>>>>>>>>>          if (is_amd()) {
>>>>>>>>>> @@ -518,16 +541,8 @@
>>>>>>>>>>          }
>>>>>>>>>>          // Intel features.
>>>>>>>>>>          if(is_intel()) {
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>> indicates
>>>>>>>>>> support for prefetchw
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0)
>>>>>>>>>> {
>>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>> @@ -590,6 +605,7 @@
>>>>>>>>>>        static ByteSize ext_cpuid5_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>>>>>        static ByteSize ext_cpuid7_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>>>>>        static ByteSize ext_cpuid8_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>>>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>>>>>>>        static ByteSize tpl_cpuidB0_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>>>>>        static ByteSize tpl_cpuidB1_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>>>>>        static ByteSize tpl_cpuidB2_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>>>>>> @@ -673,8 +689,11 @@
>>>>>>>>>>          if (is_intel() && supports_processor_topology()) {
>>>>>>>>>>            result = _cpuid_info.tpl_cpuidB0_ebx.bi
>>>>>>>>>> ts.logical_cpus;
>>>>>>>>>>          } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>>>>> -               cores_per_cpu();
>>>>>>>>>> +      if (cpu_family() >= 0x17)
>>>>>>>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bi
>>>>>>>>>> ts.threads_per_core
>>>>>>>>>>
>>>>>>>>> +
>>>
>>>> 1;
>>>>>>>>>> +      else
>>>>>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu
>>>>>>>>>>
>>>>>>>>> /
>>>
>>>> +                 cores_per_cpu();
>>>>>>>>>>          }
>>>>>>>>>>          return (result == 0 ? 1 : result);
>>>>>>>>>>        }
>>>>>>>>>>
>>>>>>>>>> I have attached the patch for review.
>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> Rohit
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>>> Vladimir
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>
>>>>>>>>>>>> No comments on AMD specific changes.
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> David
>>>>>>>>>>>> -----
>>>>>>>>>>>>
>>>>>>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hello David,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot
>>>>>>>>>>>>>>> repo.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
>>>>>>>>>>>>>> 13548:1a9c2e07a826]
>>>>>>>>>>>>>> and was able to apply the patch
>>>>>>>>>>>>>> [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>>>>>>>> without any issues.
>>>>>>>>>>>>>> Can you share the error message that you are getting?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> I was getting this:
>>>>>>>>>>>>>
>>>>>>>>>>>>> applying hotspot.patch
>>>>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> Hunk #1 FAILED at 1108
>>>>>>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> Hunk #2 FAILED at 522
>>>>>>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>>>>>>>> abort: patch failed to apply
>>>>>>>>>>>>>
>>>>>>>>>>>>> but I started again and this time it applied fine, so not sure
>>>>>>>>>>>>> what
>>>>>>>>>>>>> was
>>>>>>>>>>>>> going on there.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Cheers,
>>>>>>>>>>>>> David
>>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Changes look good. Only question I have is about
>>>>>>>>>>>>>>>>>>> MaxVectorSize.
>>>>>>>>>>>>>>>>>>> It
>>>>>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>>>>>> set
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>
>>>>
>>>>>>>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for
>>>>>>>>>>>>>>>>>> AMD
>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>> So
>>>>>>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
>>>>>>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>>>>>> I
>>>>>>>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> My older patch had the below mentioned check which was
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> required
>>>
>>>> on
>>>>>>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been
>>>>>>>>>>>>>>>> handled
>>>>>>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> ..
>>>>>>>>>>>>>>>> ..
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>
>>>>
>>>>>>>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag
>>>>>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Is
>>>
>>>> there
>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>> underlying reason for this? I have handled this in the
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> patch
>>>
>>>> but
>>>>>>>>>>>>>>>>>> just
>>>>>>>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>>>>>>>> instructions
>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 6a17c49de974
>>>
>>>>
>>>>>>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
>>>>>>>>>>>>>>>>> execute
>>>>>>>>>>>>>>>>> that
>>>>>>>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
>>>>>>>>>>>>>>>> instructions,
>>>>>>>>>>>>>>>> it should work.
>>>>>>>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> TestSHA1Intrinsics.java
>>>
>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> TestSHA512Intrinsics.java
>>>
>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> TestSHA256Intrinsics.java
>>>
>>>>
>>>>>>>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>> +      if (supports_sse4_2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>              result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>            if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>>              result |= CPU_RTM;
>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>            // AMD features.
>>>>>>>>>>>>>>>>            if (is_amd()) {
>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>>              if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> !=
>>>
>>>> 0)
>>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>>              // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bi
>>>>>>>>>>>>>>>> ts.misalignsse
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> !=
>>>
>>>> 0)
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>                result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks for your time.
>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(UseSSE42Intrinsics,
>>>>>>>>>>>>>>>>>> false);
>>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> this
>>>
>>>> CPU");
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>             // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>>             if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>         #ifdef COMPILER2
>>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> cpus.
>>>
>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>>         #endif // COMPILER2
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> crypto
>>>
>>>> hash
>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>           if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>>>               result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>>>             if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>>>>               result |= CPU_RTM;
>>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>             // AMD features.
>>>>>>>>>>>>>>>>>>             if (is_amd()) {
>>>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a
>>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 0)
>>>
>>>>                 result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>>             // Intel features.
>>>>>>>>>>>>>>>>>>             if(is_intel()) {
>>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>>               if(_cpuid_info.ext_cpuid1_ecx.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> bits.lzcnt_intel
>>>
>>>> !=
>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>               // for Intel, ecx.bits.misalignsse bit (bit
>>>>>>>>>>>>>>>>>> 8)
>>>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> bits.misalignsse
>>
>>