RFR: Newer AMD 17h (EPYC) Processor family defaults

Vladimir Kozlov vladimir.kozlov at oracle.com
Fri Nov 3 05:03:20 UTC 2017


It was pushed long ago - Oct 3, see:

http://hg.openjdk.java.net/jdk10/hs/rev/fde01e0fccb4
https://bugs.openjdk.java.net/browse/JDK-8187219

But we have to repositories jdk10/jdk10 master and jdk10/hs for Hotspot 
changes. We did not promoted changes from jdk10/hs to jdk10/jdk10. 
Testing currently is under way to do promotion. But I can not say in 
which jdk 10 build it will happened. Look for push notification in 
JDK-8187219.

Regards,
Vladimir

On 11/2/17 8:30 PM, Rohit Arul Raj wrote:
> Hello Vladimir,
> 
> Is there any update on pushing these changes to jdk10?
> 
> Thanks,
> Rohit
> 
> On Fri, Sep 22, 2017 at 1:11 PM, Rohit Arul Raj <rohitarulraj at gmail.com>
> wrote:
> 
>> Thanks Vladimir,
>>
>> On Wed, Sep 20, 2017 at 10:07 PM, Vladimir Kozlov
>> <vladimir.kozlov at oracle.com> wrote:
>>>>        __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>>>        __ jcc(Assembler::belowEqual, done);
>>>>        __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>>>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>>>> +   __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>
>>>
>>> Good. You may need to increase size of the buffer too (to be safe) to
>> 1100:
>>>
>>> static const int stub_size = 1000;
>>>
>>
>> Please find the updated patch after the requested change.
>>
>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>> b/src/cpu/x86/vm/vm_version_x86.cpp
>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>> @@ -46,7 +46,7 @@
>>   address VM_Version::_cpuinfo_cont_addr = 0;
>>
>>   static BufferBlob* stub_blob;
>> -static const int stub_size = 1000;
>> +static const int stub_size = 1100;
>>
>>   extern "C" {
>>     typedef void (*get_cpu_info_stub_t)(void*);
>> @@ -70,7 +70,7 @@
>>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>
>>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> done, wrapup;
>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> ext_cpuid8, done, wrapup;
>>       Label legacy_setup, save_restore_except, legacy_save_restore,
>> start_simd_check;
>>
>>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>> @@ -267,14 +267,30 @@
>>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>       __ jcc(Assembler::belowEqual, done);
>>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>>       __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>       __ jccb(Assembler::belowEqual, ext_cpuid7);
>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
>> supported?
>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
>> +    __ jccb(Assembler::below, ext_cpuid8);
>> +    //
>> +    // Extended cpuid(0x8000001E)
>> +    //
>> +    __ movl(rax, 0x8000001E);
>> +    __ cpuid();
>> +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_
>> cpuid1E_offset())));
>> +    __ movl(Address(rsi, 0), rax);
>> +    __ movl(Address(rsi, 4), rbx);
>> +    __ movl(Address(rsi, 8), rcx);
>> +    __ movl(Address(rsi,12), rdx);
>> +
>>       //
>>       // Extended cpuid(0x80000008)
>>       //
>> +    __ bind(ext_cpuid8);
>>       __ movl(rax, 0x80000008);
>>       __ cpuid();
>>       __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
>> @@ -1109,11 +1125,27 @@
>>       }
>>
>>   #ifdef COMPILER2
>> -    if (MaxVectorSize > 16) {
>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>       }
>>   #endif // COMPILER2
>> +
>> +    // Some defaults for AMD family 17h
>> +    if ( cpu_family() == 0x17 ) {
>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>> Array Copy
>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>> +      }
>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>> +      }
>> +#ifdef COMPILER2
>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>> +      }
>> +#endif
>> +    }
>>     }
>>
>>     if( is_intel() ) { // Intel cpus specific settings
>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>> b/src/cpu/x86/vm/vm_version_x86.hpp
>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>> @@ -228,6 +228,15 @@
>>       } bits;
>>     };
>>
>> +  union ExtCpuid1EEbx {
>> +    uint32_t value;
>> +    struct {
>> +      uint32_t                  : 8,
>> +               threads_per_core : 8,
>> +                                : 16;
>> +    } bits;
>> +  };
>> +
>>     union XemXcr0Eax {
>>       uint32_t value;
>>       struct {
>> @@ -398,6 +407,12 @@
>>       ExtCpuid8Ecx ext_cpuid8_ecx;
>>       uint32_t     ext_cpuid8_edx; // reserved
>>
>> +    // cpuid function 0x8000001E // AMD 17h
>> +    uint32_t      ext_cpuid1E_eax;
>> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>> +    uint32_t      ext_cpuid1E_ecx;
>> +    uint32_t      ext_cpuid1E_edx; // unused currently
>> +
>>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
>>       XemXcr0Eax   xem_xcr0_eax;
>>       uint32_t     xem_xcr0_edx; // reserved
>> @@ -505,6 +520,14 @@
>>         result |= CPU_CLMUL;
>>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>         result |= CPU_RTM;
>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> +       result |= CPU_ADX;
>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> +      result |= CPU_BMI2;
>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> +      result |= CPU_SHA;
>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> +      result |= CPU_FMA;
>>
>>       // AMD features.
>>       if (is_amd()) {
>> @@ -518,16 +541,8 @@
>>       }
>>       // Intel features.
>>       if(is_intel()) {
>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> -         result |= CPU_ADX;
>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> -        result |= CPU_BMI2;
>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> -        result |= CPU_SHA;
>>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>           result |= CPU_LZCNT;
>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> -        result |= CPU_FMA;
>>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>> support for prefetchw
>>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>           result |= CPU_3DNOW_PREFETCH;
>> @@ -590,6 +605,7 @@
>>     static ByteSize ext_cpuid5_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>     static ByteSize ext_cpuid7_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>     static ByteSize ext_cpuid8_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>> +  static ByteSize ext_cpuid1E_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>     static ByteSize tpl_cpuidB0_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>     static ByteSize tpl_cpuidB1_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>     static ByteSize tpl_cpuidB2_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>> @@ -673,8 +689,12 @@
>>       if (is_intel() && supports_processor_topology()) {
>>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> -               cores_per_cpu();
>> +      if (cpu_family() >= 0x17) {
>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>> +      } else {
>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> +                 cores_per_cpu();
>> +      }
>>       }
>>       return (result == 0 ? 1 : result);
>>     }
>>
>> Regards,
>> Rohit
>>
>>>>
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> @@ -70,7 +70,7 @@
>>>>        bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>
>>>>        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>> done, wrapup;
>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>> ext_cpuid8, done, wrapup;
>>>>        Label legacy_setup, save_restore_except, legacy_save_restore,
>>>> start_simd_check;
>>>>
>>>>        StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>> @@ -267,14 +267,30 @@
>>>>        __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>>>        __ jcc(Assembler::belowEqual, done);
>>>>        __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>>>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>>>> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>>        __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>>>>        __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>>        __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
>>>> supported?
>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
>>>> +    __ jccb(Assembler::below, ext_cpuid8);
>>>> +    //
>>>> +    // Extended cpuid(0x8000001E)
>>>> +    //
>>>> +    __ movl(rax, 0x8000001E);
>>>> +    __ cpuid();
>>>> +    __ lea(rsi, Address(rbp,
>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>> +    __ movl(Address(rsi, 0), rax);
>>>> +    __ movl(Address(rsi, 4), rbx);
>>>> +    __ movl(Address(rsi, 8), rcx);
>>>> +    __ movl(Address(rsi,12), rdx);
>>>> +
>>>>        //
>>>>        // Extended cpuid(0x80000008)
>>>>        //
>>>> +    __ bind(ext_cpuid8);
>>>>        __ movl(rax, 0x80000008);
>>>>        __ cpuid();
>>>>        __ lea(rsi, Address(rbp,
>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>> @@ -1109,11 +1125,27 @@
>>>>        }
>>>>
>>>>    #ifdef COMPILER2
>>>> -    if (MaxVectorSize > 16) {
>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>        }
>>>>    #endif // COMPILER2
>>>> +
>>>> +    // Some defaults for AMD family 17h
>>>> +    if ( cpu_family() == 0x17 ) {
>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>> Array Copy
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>> +      }
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>> {
>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>> +      }
>>>> +#ifdef COMPILER2
>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>> +      }
>>>> +#endif
>>>> +    }
>>>>      }
>>>>
>>>>      if( is_intel() ) { // Intel cpus specific settings
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> @@ -228,6 +228,15 @@
>>>>        } bits;
>>>>      };
>>>>
>>>> +  union ExtCpuid1EEbx {
>>>> +    uint32_t value;
>>>> +    struct {
>>>> +      uint32_t                  : 8,
>>>> +               threads_per_core : 8,
>>>> +                                : 16;
>>>> +    } bits;
>>>> +  };
>>>> +
>>>>      union XemXcr0Eax {
>>>>        uint32_t value;
>>>>        struct {
>>>> @@ -398,6 +407,12 @@
>>>>        ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>        uint32_t     ext_cpuid8_edx; // reserved
>>>>
>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>> +    uint32_t      ext_cpuid1E_eax;
>>>> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>> +    uint32_t      ext_cpuid1E_ecx;
>>>> +    uint32_t      ext_cpuid1E_edx; // unused currently
>>>> +
>>>>        // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>> register)
>>>>        XemXcr0Eax   xem_xcr0_eax;
>>>>        uint32_t     xem_xcr0_edx; // reserved
>>>> @@ -505,6 +520,14 @@
>>>>          result |= CPU_CLMUL;
>>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>          result |= CPU_RTM;
>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> +       result |= CPU_ADX;
>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> +      result |= CPU_BMI2;
>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> +      result |= CPU_SHA;
>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> +      result |= CPU_FMA;
>>>>
>>>>        // AMD features.
>>>>        if (is_amd()) {
>>>> @@ -518,16 +541,8 @@
>>>>        }
>>>>        // Intel features.
>>>>        if(is_intel()) {
>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> -         result |= CPU_ADX;
>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> -        result |= CPU_BMI2;
>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> -        result |= CPU_SHA;
>>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>            result |= CPU_LZCNT;
>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> -        result |= CPU_FMA;
>>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>> support for prefetchw
>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>            result |= CPU_3DNOW_PREFETCH;
>>>> @@ -590,6 +605,7 @@
>>>>      static ByteSize ext_cpuid5_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>      static ByteSize ext_cpuid7_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>      static ByteSize ext_cpuid8_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>      static ByteSize tpl_cpuidB0_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>      static ByteSize tpl_cpuidB1_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>      static ByteSize tpl_cpuidB2_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>> @@ -673,8 +689,12 @@
>>>>        if (is_intel() && supports_processor_topology()) {
>>>>          result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>        } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>> -               cores_per_cpu();
>>>> +      if (cpu_family() >= 0x17) {
>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
>> 1;
>>>> +      } else {
>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>> +                 cores_per_cpu();
>>>> +      }
>>>>        }
>>>>        return (result == 0 ? 1 : result);
>>>>      }
>>>>
>>>> Please let me know your comments.
>>>> Thanks for your review.
>>>>
>>>> Regards,
>>>> Rohit
>>>>
>>>>>
>>>>>
>>>>> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
>>>>>>
>>>>>>
>>>>>> Hello David,
>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> 1. ExtCpuid1EEx
>>>>>>>>
>>>>>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>>>>>>> inconsistent - and potentially confusing: I would have preferred to
>>>>>>>> see
>>>>>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Yes, I can change it accordingly.
>>>>>>>
>>>>>>
>>>>>> I have attached the updated, re-tested patch as per your comments
>> above.
>>>>>>
>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> @@ -70,7 +70,7 @@
>>>>>>         bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>
>>>>>>         Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>> done, wrapup;
>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>> ext_cpuid8, done, wrapup;
>>>>>>         Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>>> start_simd_check;
>>>>>>
>>>>>>         StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>>> @@ -272,9 +272,23 @@
>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>         __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
>> supported?
>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>> +    //
>>>>>> +    // Extended cpuid(0x8000001E)
>>>>>> +    //
>>>>>> +    __ movl(rax, 0x8000001E);
>>>>>> +    __ cpuid();
>>>>>> +    __ lea(rsi, Address(rbp,
>>>>>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
>>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>>> +
>>>>>>         //
>>>>>>         // Extended cpuid(0x80000008)
>>>>>>         //
>>>>>> +    __ bind(ext_cpuid8);
>>>>>>         __ movl(rax, 0x80000008);
>>>>>>         __ cpuid();
>>>>>>         __ lea(rsi, Address(rbp,
>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>         }
>>>>>>
>>>>>>     #ifdef COMPILER2
>>>>>> -    if (MaxVectorSize > 16) {
>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>         }
>>>>>>     #endif // COMPILER2
>>>>>> +
>>>>>> +    // Some defaults for AMD family 17h
>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>>>> Array Copy
>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>> +      }
>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>> {
>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>> +      }
>>>>>> +#ifdef COMPILER2
>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>> +      }
>>>>>> +#endif
>>>>>> +    }
>>>>>>       }
>>>>>>
>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> @@ -228,6 +228,15 @@
>>>>>>         } bits;
>>>>>>       };
>>>>>>
>>>>>> +  union ExtCpuid_1E_Ebx {
>>>>>> +    uint32_t value;
>>>>>> +    struct {
>>>>>> +      uint32_t                  : 8,
>>>>>> +               threads_per_core : 8,
>>>>>> +                                : 16;
>>>>>> +    } bits;
>>>>>> +  };
>>>>>> +
>>>>>>       union XemXcr0Eax {
>>>>>>         uint32_t value;
>>>>>>         struct {
>>>>>> @@ -398,6 +407,12 @@
>>>>>>         ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>         uint32_t     ext_cpuid8_edx; // reserved
>>>>>>
>>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>>> +    uint32_t        ext_cpuid_1E_eax;
>>>>>> +    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
>>>>>> +    uint32_t        ext_cpuid_1E_ecx;
>>>>>> +    uint32_t        ext_cpuid_1E_edx; // unused currently
>>>>>> +
>>>>>>         // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>>> register)
>>>>>>         XemXcr0Eax   xem_xcr0_eax;
>>>>>>         uint32_t     xem_xcr0_edx; // reserved
>>>>>> @@ -505,6 +520,14 @@
>>>>>>           result |= CPU_CLMUL;
>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>           result |= CPU_RTM;
>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>> +       result |= CPU_ADX;
>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>> +      result |= CPU_BMI2;
>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>> +      result |= CPU_SHA;
>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>> +      result |= CPU_FMA;
>>>>>>
>>>>>>         // AMD features.
>>>>>>         if (is_amd()) {
>>>>>> @@ -518,16 +541,8 @@
>>>>>>         }
>>>>>>         // Intel features.
>>>>>>         if(is_intel()) {
>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>> -         result |= CPU_ADX;
>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>> -        result |= CPU_BMI2;
>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>> -        result |= CPU_SHA;
>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>             result |= CPU_LZCNT;
>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>> -        result |= CPU_FMA;
>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>> support for prefetchw
>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>> @@ -590,6 +605,7 @@
>>>>>>       static ByteSize ext_cpuid5_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>       static ByteSize ext_cpuid7_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>       static ByteSize ext_cpuid8_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>> +  static ByteSize ext_cpuid_1E_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>>>>>>       static ByteSize tpl_cpuidB0_offset() { return
>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>       static ByteSize tpl_cpuidB1_offset() { return
>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>       static ByteSize tpl_cpuidB2_offset() { return
>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>> @@ -673,8 +689,11 @@
>>>>>>         if (is_intel() && supports_processor_topology()) {
>>>>>>           result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>>         } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>> -               cores_per_cpu();
>>>>>> +      if (cpu_family() >= 0x17)
>>>>>> +        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core
>> +
>>>>>> 1;
>>>>>> +      else
>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>> +                 cores_per_cpu();
>>>>>>         }
>>>>>>         return (result == 0 ? 1 : result);
>>>>>>       }
>>>>>>
>>>>>>
>>>>>> Please let me know your comments
>>>>>>
>>>>>> Thanks for your time.
>>>>>>
>>>>>> Regards,
>>>>>> Rohit
>>>>>>
>>>>>>
>>>>>>>> Thanks,
>>>>>>>> David
>>>>>>>> -----
>>>>>>>>
>>>>>>>>
>>>>>>>>> Reference:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_
>> Models_00h-0Fh.pdf
>>>>>>>>> [Pg 82]
>>>>>>>>>
>>>>>>>>>         CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>>>>>>           15:8 ThreadsPerCore: threads per core. Read-only. Reset:
>>>>>>>>> XXh.
>>>>>>>>> The number of threads per core is ThreadsPerCore+1.
>>>>>>>>>
>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> @@ -70,7 +70,7 @@
>>>>>>>>>          bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>>>>
>>>>>>>>>          Label detect_486, cpu486, detect_586, std_cpuid1,
>> std_cpuid4;
>>>>>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
>> ext_cpuid7,
>>>>>>>>> done, wrapup;
>>>>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
>> ext_cpuid7,
>>>>>>>>> ext_cpuid8, done, wrapup;
>>>>>>>>>          Label legacy_setup, save_restore_except,
>> legacy_save_restore,
>>>>>>>>> start_simd_check;
>>>>>>>>>
>>>>>>>>>          StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>>>>>> @@ -272,9 +272,23 @@
>>>>>>>>>          __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>>>>          __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
>>>>>>>>> supported?
>>>>>>>>>          __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E)
>> supported?
>>>>>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>>>>> +    //
>>>>>>>>> +    // Extended cpuid(0x8000001E)
>>>>>>>>> +    //
>>>>>>>>> +    __ movl(rax, 0x8000001E);
>>>>>>>>> +    __ cpuid();
>>>>>>>>> +    __ lea(rsi, Address(rbp,
>>>>>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>>>>>> +
>>>>>>>>>          //
>>>>>>>>>          // Extended cpuid(0x80000008)
>>>>>>>>>          //
>>>>>>>>> +    __ bind(ext_cpuid8);
>>>>>>>>>          __ movl(rax, 0x80000008);
>>>>>>>>>          __ cpuid();
>>>>>>>>>          __ lea(rsi, Address(rbp,
>>>>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>>>>          }
>>>>>>>>>
>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>          }
>>>>>>>>>      #endif // COMPILER2
>>>>>>>>> +
>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>> for
>>>>>>>>> Array Copy
>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>> {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>> +      }
>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>> {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>> +      }
>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
>> {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>> +      }
>>>>>>>>> +#endif
>>>>>>>>> +    }
>>>>>>>>>        }
>>>>>>>>>
>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> @@ -228,6 +228,15 @@
>>>>>>>>>          } bits;
>>>>>>>>>        };
>>>>>>>>>
>>>>>>>>> +  union ExtCpuid1EEx {
>>>>>>>>> +    uint32_t value;
>>>>>>>>> +    struct {
>>>>>>>>> +      uint32_t                  : 8,
>>>>>>>>> +               threads_per_core : 8,
>>>>>>>>> +                                : 16;
>>>>>>>>> +    } bits;
>>>>>>>>> +  };
>>>>>>>>> +
>>>>>>>>>        union XemXcr0Eax {
>>>>>>>>>          uint32_t value;
>>>>>>>>>          struct {
>>>>>>>>> @@ -398,6 +407,12 @@
>>>>>>>>>          ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>>>>          uint32_t     ext_cpuid8_edx; // reserved
>>>>>>>>>
>>>>>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>>>>>> +    uint32_t     ext_cpuid1E_eax;
>>>>>>>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>>>>>> +    uint32_t     ext_cpuid1E_ecx;
>>>>>>>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>>>>>>>>> +
>>>>>>>>>          // extended control register XCR0 (the
>> XFEATURE_ENABLED_MASK
>>>>>>>>> register)
>>>>>>>>>          XemXcr0Eax   xem_xcr0_eax;
>>>>>>>>>          uint32_t     xem_xcr0_edx; // reserved
>>>>>>>>> @@ -505,6 +520,14 @@
>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>
>>>>>>>>>          // AMD features.
>>>>>>>>>          if (is_amd()) {
>>>>>>>>> @@ -518,16 +541,8 @@
>>>>>>>>>          }
>>>>>>>>>          // Intel features.
>>>>>>>>>          if(is_intel()) {
>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>>>> support for prefetchw
>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>> @@ -590,6 +605,7 @@
>>>>>>>>>        static ByteSize ext_cpuid5_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>>>>        static ByteSize ext_cpuid7_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>>>>        static ByteSize ext_cpuid8_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>>>>>>        static ByteSize tpl_cpuidB0_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>>>>        static ByteSize tpl_cpuidB1_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>>>>        static ByteSize tpl_cpuidB2_offset() { return
>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>>>>> @@ -673,8 +689,11 @@
>>>>>>>>>          if (is_intel() && supports_processor_topology()) {
>>>>>>>>>            result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>>>>>          } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>>>> -               cores_per_cpu();
>>>>>>>>> +      if (cpu_family() >= 0x17)
>>>>>>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core
>> +
>>>>>>>>> 1;
>>>>>>>>> +      else
>>>>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu
>> /
>>>>>>>>> +                 cores_per_cpu();
>>>>>>>>>          }
>>>>>>>>>          return (result == 0 ? 1 : result);
>>>>>>>>>        }
>>>>>>>>>
>>>>>>>>> I have attached the patch for review.
>>>>>>>>> Please let me know your comments.
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>> Rohit
>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> Vladimir
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>
>>>>>>>>>>> No comments on AMD specific changes.
>>>>>>>>>>>
>>>>>>>>>>> Thanks,
>>>>>>>>>>> David
>>>>>>>>>>> -----
>>>>>>>>>>>
>>>>>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Hello David,
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot
>>>>>>>>>>>>>> repo.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
>>>>>>>>>>>>> 13548:1a9c2e07a826]
>>>>>>>>>>>>> and was able to apply the patch
>>>>>>>>>>>>> [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>>>>>>> without any issues.
>>>>>>>>>>>>> Can you share the error message that you are getting?
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> I was getting this:
>>>>>>>>>>>>
>>>>>>>>>>>> applying hotspot.patch
>>>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> Hunk #1 FAILED at 1108
>>>>>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> Hunk #2 FAILED at 522
>>>>>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>>>>>>> abort: patch failed to apply
>>>>>>>>>>>>
>>>>>>>>>>>> but I started again and this time it applied fine, so not sure
>>>>>>>>>>>> what
>>>>>>>>>>>> was
>>>>>>>>>>>> going on there.
>>>>>>>>>>>>
>>>>>>>>>>>> Cheers,
>>>>>>>>>>>> David
>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Changes look good. Only question I have is about
>>>>>>>>>>>>>>>>>> MaxVectorSize.
>>>>>>>>>>>>>>>>>> It
>>>>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>>>>> set
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
>> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for
>>>>>>>>>>>>>>>>> AMD
>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>> So
>>>>>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
>>>>>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>>>>> I
>>>>>>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> My older patch had the below mentioned check which was
>> required
>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been
>>>>>>>>>>>>>>> handled
>>>>>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> ..
>>>>>>>>>>>>>>> ..
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
>> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag
>>>>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2.
>> Is
>>>>>>>>>>>>>>>>> there
>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>> underlying reason for this? I have handled this in the
>> patch
>>>>>>>>>>>>>>>>> but
>>>>>>>>>>>>>>>>> just
>>>>>>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>>>>>>> instructions
>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/
>> 6a17c49de974
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
>>>>>>>>>>>>>>>> execute
>>>>>>>>>>>>>>>> that
>>>>>>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
>>>>>>>>>>>>>>> instructions,
>>>>>>>>>>>>>>> it should work.
>>>>>>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>> TestSHA1Intrinsics.java
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>> TestSHA512Intrinsics.java
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>> TestSHA256Intrinsics.java
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>> +      if (supports_sse4_2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>              result |= CPU_CLMUL;
>>>>>>>>>>>>>>>            if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>              result |= CPU_RTM;
>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>            // AMD features.
>>>>>>>>>>>>>>>            if (is_amd()) {
>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>              if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>> !=
>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>              // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
>> !=
>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>                result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks for your time.
>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
>> this
>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>             // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>             if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>         #ifdef COMPILER2
>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>> cpus.
>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>         #endif // COMPILER2
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>> crypto
>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>           if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>>               result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>>             if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>>>               result |= CPU_RTM;
>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>             // AMD features.
>>>>>>>>>>>>>>>>>             if (is_amd()) {
>>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
>> 0)
>>>>>>>>>>>>>>>>>                 result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>             // Intel features.
>>>>>>>>>>>>>>>>>             if(is_intel()) {
>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>               if(_cpuid_info.ext_cpuid1_ecx.
>> bits.lzcnt_intel
>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>               // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.
>> bits.misalignsse
>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>>>>                 result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already
>>>>>>>>>>>>>>>>>>>>> see
>>>>>>>>>>>>>>>>>>>>> a
>>>>>>>>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source
>>>>>>>>>>>>>>>>>>>> base,
>>>>>>>>>>>>>>>>>>>> test
>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg
>>>>>>>>>>>>>>>>>>> ($make
>>>>>>>>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Can anyone please volunteer to review this patch  which
>>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> ************************* Patch
>>>>>>>>>>>>>>>>>>> ****************************
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>>>>>                }
>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(UseSSE42Intrinsics,
>> false);
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>>>> this
>>>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>              // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>>>              if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>          #ifdef COMPILER2
>>>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
>> 17h.
>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>          #endif // COMPILER2
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>>> crypto
>>>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>            if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>>>>                result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>>>>              if (_cpuid_info.sef_cpuid7_ebx.bits.rtm !=
>> 0)
>>>>>>>>>>>>>>>>>>>                result |= CPU_RTM;
>>>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>              // AMD features.
>>>>>>>>>>>>>>>>>>>              if (is_amd()) {
>>>>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>>>>                  result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>>>                if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a
>> !=
>>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>>>                  result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>              // Intel features.
>>>>>>>>>>>>>>>>>>>              if(is_intel()) {
>>>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>>>>>> != 0)
>>>>>>>>>>>>>>>>>>>                  result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>>                // for Intel, ecx.bits.misalignsse bit (bit
>>>>>>>>>>>>>>>>>>> 8)
>>>>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>>>>                if
>>>>>>>>>>>>>>>>>>> (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
>>>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>>>>>>                  result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> ******************************
>> ********************************
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch
>>>>>>>>>>>>>>>>>>>>>>>> (openJDK9)
>>>>>>>>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor
>>>>>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/
>> 08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from
>> systems
>>>>>>>>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for
>>>>>>>>>>>>>>>>>>>>>>>> reference.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the
>>>>>>>>>>>>>>>>>>>>>>> mail
>>>>>>>>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise
>> you
>>>>>>>>>>>>>>>>>>>>>>> will
>>>>>>>>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to
>>>>>>>>>>>>>>>>>>>>>>> comment
>>>>>>>>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>>>>>>>>                 }
>>>>>>>>>>>>>>>>>>>>>>                 FLAG_SET_DEFAULT(UseSSE42Intrinsics,
>>>>>>>>>>>>>>>>>>>>>> false);
>>>>>>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available
>> on
>>>>>>>>>>>>>>>>>>>>>> this
>>>>>>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>               // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>>>>>>               if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>           #ifdef COMPILER2
>>>>>>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current
>> AMD
>>>>>>>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
>>>>>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>>>>>>                 FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>>>>>>           #endif // COMPILER2
>>>>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics,
>> false);
>>>>>>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>>>>>> crypto
>>>>>>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics,
>> false);
>>>>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>             if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>>>>>>>>                   result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>>>>>>                 if (_cpuid_info.ext_cpuid1_ecx.
>> bits.sse4a
>>>>>>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>>>>>>                   result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>>>>>>               // Intel features.
>>>>>>>>>>>>>>>>>>>>>>               if(is_intel()) {
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>
>>


More information about the hotspot-dev mailing list