RFR: Newer AMD 17h (EPYC) Processor family defaults

Wed Sep 20 16:37:07 UTC 2017

On 9/14/17 11:40 AM, Rohit Arul Raj wrote:
> Hello Vladimir,
> 
>> CPUID check for 0x8000001E should be explicit. Otherwise the code will be
>> executed for all above 0x80000008.
>>
>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
>> supported?
>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
>> +    __ jccb(Assembler::notEqual, ext_cpuid8);
>> +    //
>> +    // Extended cpuid(0x8000001E)
>> +    //
>>
> 
> AMD17h has CPUID 0x8000001F too, so the notEqual condition will be false.

You are right.

> I have modified the last statement as below:
> 
> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
> +    __ jccb(Assembler::below, ext_cpuid8);
> 
> Is this OK?

Yes.

> 
> After updating the above changes, I got "Short forward jump exceeds
> 8-bit offset" error while building openJDK.
> 
> #
> # A fatal error has been detected by the Java Runtime Environment:
> #
> #  Internal Error (macroAssembler_x86.hpp:116), pid=7786, tid=7787
> #  guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
> 
> So I have replaced the short jump with near jump while checking for
> CPUID 0x80000005.
> 
>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>       __ jcc(Assembler::belowEqual, done);
>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
> +   __ jcc(Assembler::belowEqual, ext_cpuid1);

Good. You may need to increase size of the buffer too (to be safe) to 1100:

static const int stub_size = 1000;

Thanks,
Vladimir

> 
> I have attached the updated, re-tested patch.
> 
> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> b/src/cpu/x86/vm/vm_version_x86.cpp
> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> @@ -70,7 +70,7 @@
>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
> 
>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> done, wrapup;
> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> ext_cpuid8, done, wrapup;
>       Label legacy_setup, save_restore_except, legacy_save_restore,
> start_simd_check;
> 
>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> @@ -267,14 +267,30 @@
>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>       __ jcc(Assembler::belowEqual, done);
>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>       __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>       __ jccb(Assembler::belowEqual, ext_cpuid7);
> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
> +    __ jccb(Assembler::below, ext_cpuid8);
> +    //
> +    // Extended cpuid(0x8000001E)
> +    //
> +    __ movl(rax, 0x8000001E);
> +    __ cpuid();
> +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
> +    __ movl(Address(rsi, 0), rax);
> +    __ movl(Address(rsi, 4), rbx);
> +    __ movl(Address(rsi, 8), rcx);
> +    __ movl(Address(rsi,12), rdx);
> +
>       //
>       // Extended cpuid(0x80000008)
>       //
> +    __ bind(ext_cpuid8);
>       __ movl(rax, 0x80000008);
>       __ cpuid();
>       __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
> @@ -1109,11 +1125,27 @@
>       }
> 
>   #ifdef COMPILER2
> -    if (MaxVectorSize > 16) {
> -      // Limit vectors size to 16 bytes on current AMD cpus.
> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>       }
>   #endif // COMPILER2
> +
> +    // Some defaults for AMD family 17h
> +    if ( cpu_family() == 0x17 ) {
> +      // On family 17h processors use XMM and UnalignedLoadStores for
> Array Copy
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> +      }
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> +      }
> +#ifdef COMPILER2
> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> +      }
> +#endif
> +    }
>     }
> 
>     if( is_intel() ) { // Intel cpus specific settings
> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> b/src/cpu/x86/vm/vm_version_x86.hpp
> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> @@ -228,6 +228,15 @@
>       } bits;
>     };
> 
> +  union ExtCpuid1EEbx {
> +    uint32_t value;
> +    struct {
> +      uint32_t                  : 8,
> +               threads_per_core : 8,
> +                                : 16;
> +    } bits;
> +  };
> +
>     union XemXcr0Eax {
>       uint32_t value;
>       struct {
> @@ -398,6 +407,12 @@
>       ExtCpuid8Ecx ext_cpuid8_ecx;
>       uint32_t     ext_cpuid8_edx; // reserved
> 
> +    // cpuid function 0x8000001E // AMD 17h
> +    uint32_t      ext_cpuid1E_eax;
> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
> +    uint32_t      ext_cpuid1E_ecx;
> +    uint32_t      ext_cpuid1E_edx; // unused currently
> +
>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
>       XemXcr0Eax   xem_xcr0_eax;
>       uint32_t     xem_xcr0_edx; // reserved
> @@ -505,6 +520,14 @@
>         result |= CPU_CLMUL;
>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>         result |= CPU_RTM;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> +       result |= CPU_ADX;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> +      result |= CPU_BMI2;
> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> +      result |= CPU_SHA;
> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> +      result |= CPU_FMA;
> 
>       // AMD features.
>       if (is_amd()) {
> @@ -518,16 +541,8 @@
>       }
>       // Intel features.
>       if(is_intel()) {
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> -         result |= CPU_ADX;
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> -        result |= CPU_BMI2;
> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> -        result |= CPU_SHA;
>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>           result |= CPU_LZCNT;
> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> -        result |= CPU_FMA;
>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> support for prefetchw
>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>           result |= CPU_3DNOW_PREFETCH;
> @@ -590,6 +605,7 @@
>     static ByteSize ext_cpuid5_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>     static ByteSize ext_cpuid7_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>     static ByteSize ext_cpuid8_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> +  static ByteSize ext_cpuid1E_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>     static ByteSize tpl_cpuidB0_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>     static ByteSize tpl_cpuidB1_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>     static ByteSize tpl_cpuidB2_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> @@ -673,8 +689,12 @@
>       if (is_intel() && supports_processor_topology()) {
>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> -               cores_per_cpu();
> +      if (cpu_family() >= 0x17) {
> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
> +      } else {
> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> +                 cores_per_cpu();
> +      }
>       }
>       return (result == 0 ? 1 : result);
>     }
> 
> Please let me know your comments.
> Thanks for your review.
> 
> Regards,
> Rohit
> 
>>
>>
>> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
>>>
>>> Hello David,
>>>
>>>>>
>>>>>
>>>>> 1. ExtCpuid1EEx
>>>>>
>>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>>>> inconsistent - and potentially confusing: I would have preferred to see
>>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>>>
>>>>
>>>> Yes, I can change it accordingly.
>>>>
>>>
>>> I have attached the updated, re-tested patch as per your comments above.
>>>
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>> @@ -70,7 +70,7 @@
>>>        bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>
>>>        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> done, wrapup;
>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> ext_cpuid8, done, wrapup;
>>>        Label legacy_setup, save_restore_except, legacy_save_restore,
>>> start_simd_check;
>>>
>>>        StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>> @@ -272,9 +272,23 @@
>>>        __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>        __ jccb(Assembler::belowEqual, ext_cpuid7);
>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>> +    //
>>> +    // Extended cpuid(0x8000001E)
>>> +    //
>>> +    __ movl(rax, 0x8000001E);
>>> +    __ cpuid();
>>> +    __ lea(rsi, Address(rbp,
>>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
>>> +    __ movl(Address(rsi, 0), rax);
>>> +    __ movl(Address(rsi, 4), rbx);
>>> +    __ movl(Address(rsi, 8), rcx);
>>> +    __ movl(Address(rsi,12), rdx);
>>> +
>>>        //
>>>        // Extended cpuid(0x80000008)
>>>        //
>>> +    __ bind(ext_cpuid8);
>>>        __ movl(rax, 0x80000008);
>>>        __ cpuid();
>>>        __ lea(rsi, Address(rbp,
>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>> @@ -1109,11 +1123,27 @@
>>>        }
>>>
>>>    #ifdef COMPILER2
>>> -    if (MaxVectorSize > 16) {
>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>        }
>>>    #endif // COMPILER2
>>> +
>>> +    // Some defaults for AMD family 17h
>>> +    if ( cpu_family() == 0x17 ) {
>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>> Array Copy
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>> +      }
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>> +      }
>>> +#ifdef COMPILER2
>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>> +      }
>>> +#endif
>>> +    }
>>>      }
>>>
>>>      if( is_intel() ) { // Intel cpus specific settings
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>> @@ -228,6 +228,15 @@
>>>        } bits;
>>>      };
>>>
>>> +  union ExtCpuid_1E_Ebx {
>>> +    uint32_t value;
>>> +    struct {
>>> +      uint32_t                  : 8,
>>> +               threads_per_core : 8,
>>> +                                : 16;
>>> +    } bits;
>>> +  };
>>> +
>>>      union XemXcr0Eax {
>>>        uint32_t value;
>>>        struct {
>>> @@ -398,6 +407,12 @@
>>>        ExtCpuid8Ecx ext_cpuid8_ecx;
>>>        uint32_t     ext_cpuid8_edx; // reserved
>>>
>>> +    // cpuid function 0x8000001E // AMD 17h
>>> +    uint32_t        ext_cpuid_1E_eax;
>>> +    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
>>> +    uint32_t        ext_cpuid_1E_ecx;
>>> +    uint32_t        ext_cpuid_1E_edx; // unused currently
>>> +
>>>        // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>> register)
>>>        XemXcr0Eax   xem_xcr0_eax;
>>>        uint32_t     xem_xcr0_edx; // reserved
>>> @@ -505,6 +520,14 @@
>>>          result |= CPU_CLMUL;
>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>          result |= CPU_RTM;
>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> +       result |= CPU_ADX;
>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> +      result |= CPU_BMI2;
>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> +      result |= CPU_SHA;
>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> +      result |= CPU_FMA;
>>>
>>>        // AMD features.
>>>        if (is_amd()) {
>>> @@ -518,16 +541,8 @@
>>>        }
>>>        // Intel features.
>>>        if(is_intel()) {
>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> -         result |= CPU_ADX;
>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> -        result |= CPU_BMI2;
>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> -        result |= CPU_SHA;
>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>            result |= CPU_LZCNT;
>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> -        result |= CPU_FMA;
>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>> support for prefetchw
>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>            result |= CPU_3DNOW_PREFETCH;
>>> @@ -590,6 +605,7 @@
>>>      static ByteSize ext_cpuid5_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>      static ByteSize ext_cpuid7_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>      static ByteSize ext_cpuid8_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>> +  static ByteSize ext_cpuid_1E_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>>>      static ByteSize tpl_cpuidB0_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>      static ByteSize tpl_cpuidB1_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>      static ByteSize tpl_cpuidB2_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>> @@ -673,8 +689,11 @@
>>>        if (is_intel() && supports_processor_topology()) {
>>>          result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>        } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> -               cores_per_cpu();
>>> +      if (cpu_family() >= 0x17)
>>> +        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core + 1;
>>> +      else
>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> +                 cores_per_cpu();
>>>        }
>>>        return (result == 0 ? 1 : result);
>>>      }
>>>
>>>
>>> Please let me know your comments
>>>
>>> Thanks for your time.
>>>
>>> Regards,
>>> Rohit
>>>
>>>
>>>>> Thanks,
>>>>> David
>>>>> -----
>>>>>
>>>>>
>>>>>> Reference:
>>>>>>
>>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
>>>>>> [Pg 82]
>>>>>>
>>>>>>        CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>>>          15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
>>>>>> The number of threads per core is ThreadsPerCore+1.
>>>>>>
>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> @@ -70,7 +70,7 @@
>>>>>>         bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>
>>>>>>         Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>> done, wrapup;
>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>> ext_cpuid8, done, wrapup;
>>>>>>         Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>>> start_simd_check;
>>>>>>
>>>>>>         StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>>> @@ -272,9 +272,23 @@
>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>         __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>> +    //
>>>>>> +    // Extended cpuid(0x8000001E)
>>>>>> +    //
>>>>>> +    __ movl(rax, 0x8000001E);
>>>>>> +    __ cpuid();
>>>>>> +    __ lea(rsi, Address(rbp,
>>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>>> +
>>>>>>         //
>>>>>>         // Extended cpuid(0x80000008)
>>>>>>         //
>>>>>> +    __ bind(ext_cpuid8);
>>>>>>         __ movl(rax, 0x80000008);
>>>>>>         __ cpuid();
>>>>>>         __ lea(rsi, Address(rbp,
>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>         }
>>>>>>
>>>>>>     #ifdef COMPILER2
>>>>>> -    if (MaxVectorSize > 16) {
>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>         }
>>>>>>     #endif // COMPILER2
>>>>>> +
>>>>>> +    // Some defaults for AMD family 17h
>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>>>> Array Copy
>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>> +      }
>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>> {
>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>> +      }
>>>>>> +#ifdef COMPILER2
>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>> +      }
>>>>>> +#endif
>>>>>> +    }
>>>>>>       }
>>>>>>
>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> @@ -228,6 +228,15 @@
>>>>>>         } bits;
>>>>>>       };
>>>>>>
>>>>>> +  union ExtCpuid1EEx {
>>>>>> +    uint32_t value;
>>>>>> +    struct {
>>>>>> +      uint32_t                  : 8,
>>>>>> +               threads_per_core : 8,
>>>>>> +                                : 16;
>>>>>> +    } bits;
>>>>>> +  };
>>>>>> +
>>>>>>       union XemXcr0Eax {
>>>>>>         uint32_t value;
>>>>>>         struct {
>>>>>> @@ -398,6 +407,12 @@
>>>>>>         ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>         uint32_t     ext_cpuid8_edx; // reserved
>>>>>>
>>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>>> +    uint32_t     ext_cpuid1E_eax;
>>>>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>>> +    uint32_t     ext_cpuid1E_ecx;
>>>>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>>>>>> +
>>>>>>         // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>>> register)
>>>>>>         XemXcr0Eax   xem_xcr0_eax;
>>>>>>         uint32_t     xem_xcr0_edx; // reserved
>>>>>> @@ -505,6 +520,14 @@
>>>>>>           result |= CPU_CLMUL;
>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>           result |= CPU_RTM;
>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>> +       result |= CPU_ADX;
>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>> +      result |= CPU_BMI2;
>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>> +      result |= CPU_SHA;
>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>> +      result |= CPU_FMA;
>>>>>>
>>>>>>         // AMD features.
>>>>>>         if (is_amd()) {
>>>>>> @@ -518,16 +541,8 @@
>>>>>>         }
>>>>>>         // Intel features.
>>>>>>         if(is_intel()) {
>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>> -         result |= CPU_ADX;
>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>> -        result |= CPU_BMI2;
>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>> -        result |= CPU_SHA;
>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>             result |= CPU_LZCNT;
>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>> -        result |= CPU_FMA;
>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>> support for prefetchw
>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>> @@ -590,6 +605,7 @@
>>>>>>       static ByteSize ext_cpuid5_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>       static ByteSize ext_cpuid7_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>       static ByteSize ext_cpuid8_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>>>       static ByteSize tpl_cpuidB0_offset() { return
>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>       static ByteSize tpl_cpuidB1_offset() { return
>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>       static ByteSize tpl_cpuidB2_offset() { return
>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>> @@ -673,8 +689,11 @@
>>>>>>         if (is_intel() && supports_processor_topology()) {
>>>>>>           result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>>         } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>> -               cores_per_cpu();
>>>>>> +      if (cpu_family() >= 0x17)
>>>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
>>>>>> 1;
>>>>>> +      else
>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>> +                 cores_per_cpu();
>>>>>>         }
>>>>>>         return (result == 0 ? 1 : result);
>>>>>>       }
>>>>>>
>>>>>> I have attached the patch for review.
>>>>>> Please let me know your comments.
>>>>>>
>>>>>> Thanks,
>>>>>> Rohit
>>>>>>
>>>>>>> Thanks,
>>>>>>> Vladimir
>>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>
>>>>>>>> No comments on AMD specific changes.
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> David
>>>>>>>> -----
>>>>>>>>
>>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Hello David,
>>>>>>>>>>
>>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>> wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>
>>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
>>>>>>>>>> 13548:1a9c2e07a826]
>>>>>>>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>>>> without any issues.
>>>>>>>>>> Can you share the error message that you are getting?
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I was getting this:
>>>>>>>>>
>>>>>>>>> applying hotspot.patch
>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> Hunk #1 FAILED at 1108
>>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> Hunk #2 FAILED at 522
>>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>>>> abort: patch failed to apply
>>>>>>>>>
>>>>>>>>> but I started again and this time it applied fine, so not sure what
>>>>>>>>> was
>>>>>>>>> going on there.
>>>>>>>>>
>>>>>>>>> Cheers,
>>>>>>>>> David
>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>> Rohit
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>
>>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>
>>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Changes look good. Only question I have is about
>>>>>>>>>>>>>>> MaxVectorSize.
>>>>>>>>>>>>>>> It
>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>> set
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD
>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>> So
>>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
>>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>> I
>>>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> My older patch had the below mentioned check which was required
>>>>>>>>>>>> on
>>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>>>
>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>> ...
>>>>>>>>>>>> ...
>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> ..
>>>>>>>>>>>> ..
>>>>>>>>>>>> +      }
>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag gets
>>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is
>>>>>>>>>>>>>> there
>>>>>>>>>>>>>> an
>>>>>>>>>>>>>> underlying reason for this? I have handled this in the patch
>>>>>>>>>>>>>> but
>>>>>>>>>>>>>> just
>>>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>>>> instructions
>>>>>>>>>>>>> to
>>>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>>>
>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>>>>>>
>>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
>>>>>>>>>>>>> execute
>>>>>>>>>>>>> that
>>>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
>>>>>>>>>>>> instructions,
>>>>>>>>>>>> it should work.
>>>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>>>>>>
>>>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>>>
>>>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>>>           }
>>>>>>>>>>>>
>>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>           }
>>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>>> +
>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>> for
>>>>>>>>>>>> Array Copy
>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>> {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>> +      if (supports_sse4_2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>>>> {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +#endif
>>>>>>>>>>>> +    }
>>>>>>>>>>>>         }
>>>>>>>>>>>>
>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>             result |= CPU_CLMUL;
>>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>             result |= CPU_RTM;
>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>
>>>>>>>>>>>>           // AMD features.
>>>>>>>>>>>>           if (is_amd()) {
>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>           }
>>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>> indicates
>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0)
>>>>>>>>>>>> {
>>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>
>>>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks for your time.
>>>>>>>>>>>> Rohit
>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>            // some defaults for AMD family 15h
>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>> for
>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>> {
>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>              result |= CPU_CLMUL;
>>>>>>>>>>>>>>            if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>              result |= CPU_RTM;
>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>            // AMD features.
>>>>>>>>>>>>>>            if (is_amd()) {
>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>              if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel !=
>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>              // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse !=
>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>                result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see
>>>>>>>>>>>>>>>>>> a
>>>>>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base,
>>>>>>>>>>>>>>>>> test
>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg
>>>>>>>>>>>>>>>> ($make
>>>>>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Can anyone please volunteer to review this patch  which sets
>>>>>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>             // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>             if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>         #ifdef COMPILER2
>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>         #endif // COMPILER2
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>           if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>               result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>             if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>>               result |= CPU_RTM;
>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>             // AMD features.
>>>>>>>>>>>>>>>>             if (is_amd()) {
>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>>>                 result |= CPU_SSE4A;
>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>             // Intel features.
>>>>>>>>>>>>>>>>             if(is_intel()) {
>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>>               if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>>> != 0)
>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>>               // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>>>                 result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> **************************************************************
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch
>>>>>>>>>>>>>>>>>>>>> (openJDK9)
>>>>>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and
>>>>>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems
>>>>>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for
>>>>>>>>>>>>>>>>>>>>> reference.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the
>>>>>>>>>>>>>>>>>>>> mail
>>>>>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you
>>>>>>>>>>>>>>>>>>>> will
>>>>>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to
>>>>>>>>>>>>>>>>>>>> comment
>>>>>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>>>>>                }
>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>>>> this
>>>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>              // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>>>              if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>          #ifdef COMPILER2
>>>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
>>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>          #endif // COMPILER2
>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>>> crypto
>>>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>            if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>>>>>                  result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>>>                if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
>>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>>>                  result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>              // Intel features.
>>>>>>>>>>>>>>>>>>>              if(is_intel()) {
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>
>>>> >