RFR: Newer AMD 17h (EPYC) Processor family defaults

Rohit Arul Raj rohitarulraj at gmail.com
Mon Sep 11 09:08:48 UTC 2017


Hello David,

Thanks for the review.

>
> This detail is getting beyond my knowledge I'm afraid. I have two queries:
>
> 1. ExtCpuid1EEx
>
> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
> inconsistent - and potentially confusing: I would have preferred to see
> things like ExtCpuid_1E_Ebx, to make it clear.)

Yes, I can change it accordingly.

> 2. You fixed the calculation of threads_per_core by reading it directly, but
> I have to wonder which part of the old calculation:
>
> 695         result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> 696                  cores_per_cpu();
>
> was actually going wrong in the case of the 17h?

Currently in OpenDJK, we get the no. of threads per core from the
following CPUID bits.
        threads_per_core = CPUID_Fn00000001_EBX [16:23] /
CPUID_Fn80000008_ECX [0:7] (cores_per_cpu).

It used to work with AMD15h since
CPUID Fn0000_0001_EBX [16:23] : gives no of logical cores.
CPUID Fn8000_0008_ECX [0:7]     : gives no of physical cores.
http://support.amd.com/TechDocs/55072_AMD_Family_15h_Models_70h-7Fh_BKDG.pdf
[Pg 54: 2.4.11.1 Multi-Core Support]

For AMD17h EPYC, using the same CPUID's, we get threads per core as 1 since:
CPUID Fn0000_0001_EBX [16:23] : gives no of logical cores.
CPUID Fn8000_0008_ECX [0:7]     : gives no of threads in the package.
https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf [Pg 74]

Hence the change was required for 17h family of processors.

I will update the changes and submit the patch again.

Thanks,
Rohit

> Thanks,
> David
> -----
>
>
>> Reference:
>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
>> [Pg 82]
>>
>>      CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>        15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
>> The number of threads per core is ThreadsPerCore+1.
>>
>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>> b/src/cpu/x86/vm/vm_version_x86.cpp
>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>> @@ -70,7 +70,7 @@
>>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>
>>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> done, wrapup;
>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> ext_cpuid8, done, wrapup;
>>       Label legacy_setup, save_restore_except, legacy_save_restore,
>> start_simd_check;
>>
>>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>> @@ -272,9 +272,23 @@
>>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>       __ jccb(Assembler::belowEqual, ext_cpuid7);
>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>> +    //
>> +    // Extended cpuid(0x8000001E)
>> +    //
>> +    __ movl(rax, 0x8000001E);
>> +    __ cpuid();
>> +    __ lea(rsi, Address(rbp,
>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>> +    __ movl(Address(rsi, 0), rax);
>> +    __ movl(Address(rsi, 4), rbx);
>> +    __ movl(Address(rsi, 8), rcx);
>> +    __ movl(Address(rsi,12), rdx);
>> +
>>       //
>>       // Extended cpuid(0x80000008)
>>       //
>> +    __ bind(ext_cpuid8);
>>       __ movl(rax, 0x80000008);
>>       __ cpuid();
>>       __ lea(rsi, Address(rbp,
>> in_bytes(VM_Version::ext_cpuid8_offset())));
>> @@ -1109,11 +1123,27 @@
>>       }
>>
>>   #ifdef COMPILER2
>> -    if (MaxVectorSize > 16) {
>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>       }
>>   #endif // COMPILER2
>> +
>> +    // Some defaults for AMD family 17h
>> +    if ( cpu_family() == 0x17 ) {
>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>> Array Copy
>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>> +      }
>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>> +      }
>> +#ifdef COMPILER2
>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>> +      }
>> +#endif
>> +    }
>>     }
>>
>>     if( is_intel() ) { // Intel cpus specific settings
>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>> b/src/cpu/x86/vm/vm_version_x86.hpp
>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>> @@ -228,6 +228,15 @@
>>       } bits;
>>     };
>>
>> +  union ExtCpuid1EEx {
>> +    uint32_t value;
>> +    struct {
>> +      uint32_t                  : 8,
>> +               threads_per_core : 8,
>> +                                : 16;
>> +    } bits;
>> +  };
>> +
>>     union XemXcr0Eax {
>>       uint32_t value;
>>       struct {
>> @@ -398,6 +407,12 @@
>>       ExtCpuid8Ecx ext_cpuid8_ecx;
>>       uint32_t     ext_cpuid8_edx; // reserved
>>
>> +    // cpuid function 0x8000001E // AMD 17h
>> +    uint32_t     ext_cpuid1E_eax;
>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>> +    uint32_t     ext_cpuid1E_ecx;
>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>> +
>>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>> register)
>>       XemXcr0Eax   xem_xcr0_eax;
>>       uint32_t     xem_xcr0_edx; // reserved
>> @@ -505,6 +520,14 @@
>>         result |= CPU_CLMUL;
>>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>         result |= CPU_RTM;
>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> +       result |= CPU_ADX;
>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> +      result |= CPU_BMI2;
>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> +      result |= CPU_SHA;
>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> +      result |= CPU_FMA;
>>
>>       // AMD features.
>>       if (is_amd()) {
>> @@ -518,16 +541,8 @@
>>       }
>>       // Intel features.
>>       if(is_intel()) {
>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> -         result |= CPU_ADX;
>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> -        result |= CPU_BMI2;
>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> -        result |= CPU_SHA;
>>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>           result |= CPU_LZCNT;
>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> -        result |= CPU_FMA;
>>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>> support for prefetchw
>>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>           result |= CPU_3DNOW_PREFETCH;
>> @@ -590,6 +605,7 @@
>>     static ByteSize ext_cpuid5_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>     static ByteSize ext_cpuid7_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>     static ByteSize ext_cpuid8_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>> +  static ByteSize ext_cpuid1E_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>     static ByteSize tpl_cpuidB0_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>     static ByteSize tpl_cpuidB1_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>     static ByteSize tpl_cpuidB2_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>> @@ -673,8 +689,11 @@
>>       if (is_intel() && supports_processor_topology()) {
>>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> -               cores_per_cpu();
>> +      if (cpu_family() >= 0x17)
>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>> +      else
>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> +                 cores_per_cpu();
>>       }
>>       return (result == 0 ? 1 : result);
>>     }
>>
>> I have attached the patch for review.
>> Please let me know your comments.
>>
>> Thanks,
>> Rohit
>>
>>> Thanks,
>>> Vladimir
>>>
>>>
>>>>
>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>
>>>> No comments on AMD specific changes.
>>>>
>>>> Thanks,
>>>> David
>>>> -----
>>>>
>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>
>>>>>
>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>
>>>>>>
>>>>>> Hello David,
>>>>>>
>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>> <david.holmes at oracle.com>
>>>>>> wrote:
>>>>>>>
>>>>>>>
>>>>>>> Hi Rohit,
>>>>>>>
>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>>>
>>>>>>
>>>>>> I checked out the latest jdk10/hs/hotspot [parent: 13548:1a9c2e07a826]
>>>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>>>> without any issues.
>>>>>> Can you share the error message that you are getting?
>>>>>
>>>>>
>>>>>
>>>>> I was getting this:
>>>>>
>>>>> applying hotspot.patch
>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>> Hunk #1 FAILED at 1108
>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>> Hunk #2 FAILED at 522
>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>> abort: patch failed to apply
>>>>>
>>>>> but I started again and this time it applied fine, so not sure what was
>>>>> going on there.
>>>>>
>>>>> Cheers,
>>>>> David
>>>>>
>>>>>> Regards,
>>>>>> Rohit
>>>>>>
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Hello Vladimir,
>>>>>>>>
>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hi Rohit,
>>>>>>>>>
>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>
>>>>>>>>>>> Changes look good. Only question I have is about MaxVectorSize.
>>>>>>>>>>> It
>>>>>>>>>>> is
>>>>>>>>>>> set
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>
>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD
>>>>>>>>>> 17h.
>>>>>>>>>> So
>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my patch.
>>>>>>>>>> I
>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Which check you removed?
>>>>>>>>>
>>>>>>>>
>>>>>>>> My older patch had the below mentioned check which was required on
>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>
>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>> ...
>>>>>>>> ...
>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>> +      }
>>>>>>>> ..
>>>>>>>> ..
>>>>>>>> +      }
>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>
>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag gets
>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is
>>>>>>>>>> there
>>>>>>>>>> an
>>>>>>>>>> underlying reason for this? I have handled this in the patch but
>>>>>>>>>> just
>>>>>>>>>> wanted to confirm.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>> instructions
>>>>>>>>> to
>>>>>>>>> calculate SHA-256:
>>>>>>>>>
>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>>
>>>>>>>>> I don't know if AMD 15h supports these instructions and can execute
>>>>>>>>> that
>>>>>>>>> code. You need to test it.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2 instructions,
>>>>>>>> it should work.
>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>
>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>>
>>>>>>>>
>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>>
>>>>>>>>
>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>>
>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>
>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>
>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>         }
>>>>>>>>
>>>>>>>>     #ifdef COMPILER2
>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>         }
>>>>>>>>     #endif // COMPILER2
>>>>>>>> +
>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>> for
>>>>>>>> Array Copy
>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>> +      }
>>>>>>>> +      if (supports_sse2() &&
>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>> {
>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>> +      }
>>>>>>>> +#ifdef COMPILER2
>>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>> {
>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>> +      }
>>>>>>>> +#endif
>>>>>>>> +    }
>>>>>>>>       }
>>>>>>>>
>>>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>           result |= CPU_CLMUL;
>>>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>           result |= CPU_RTM;
>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>> +       result |= CPU_ADX;
>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>> +      result |= CPU_SHA;
>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>
>>>>>>>>         // AMD features.
>>>>>>>>         if (is_amd()) {
>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>             result |= CPU_LZCNT;
>>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>             result |= CPU_SSE4A;
>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>> +        result |= CPU_HT;
>>>>>>>>         }
>>>>>>>>         // Intel features.
>>>>>>>>         if(is_intel()) {
>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>> -         result |= CPU_ADX;
>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>             result |= CPU_LZCNT;
>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>>> support for prefetchw
>>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>>>>
>>>>>>>> Please let me know your comments.
>>>>>>>>
>>>>>>>> Thanks for your time.
>>>>>>>> Rohit
>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>            }
>>>>>>>>>>            FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>          }
>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>> +      }
>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>>>> ||
>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>> CPU");
>>>>>>>>>> +      }
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>> +    }
>>>>>>>>>>
>>>>>>>>>>          // some defaults for AMD family 15h
>>>>>>>>>>          if ( cpu_family() == 0x15 ) {
>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>          }
>>>>>>>>>>
>>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>          }
>>>>>>>>>>      #endif // COMPILER2
>>>>>>>>>> +
>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>> for
>>>>>>>>>> Array Copy
>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>> {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>> +      }
>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>> +      }
>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>> {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>> +      }
>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>> +        }
>>>>>>>>>> +      }
>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>> +        }
>>>>>>>>>> +      }
>>>>>>>>>> +#endif
>>>>>>>>>> +    }
>>>>>>>>>>        }
>>>>>>>>>>
>>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>
>>>>>>>>>>          // AMD features.
>>>>>>>>>>          if (is_amd()) {
>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>              result |= CPU_SSE4A;
>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>          }
>>>>>>>>>>          // Intel features.
>>>>>>>>>>          if(is_intel()) {
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>> indicates
>>>>>>>>>> support for prefetchw
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Regards,
>>>>>>>>>> Rohit
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see a
>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base,
>>>>>>>>>>>>> test
>>>>>>>>>>>>> and
>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>
>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg ($make
>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>
>>>>>>>>>>>> Can anyone please volunteer to review this patch  which sets
>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>
>>>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>             }
>>>>>>>>>>>>             FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>           }
>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>> ||
>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>> CPU");
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>> +    }
>>>>>>>>>>>>
>>>>>>>>>>>>           // some defaults for AMD family 15h
>>>>>>>>>>>>           if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>           }
>>>>>>>>>>>>
>>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>           }
>>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>>> +
>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>> for
>>>>>>>>>>>> Array Copy
>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>> {
>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>> {
>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>> hash
>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>> +        }
>>>>>>>>>>>> +      }
>>>>>>>>>>>> +#endif
>>>>>>>>>>>> +    }
>>>>>>>>>>>>         }
>>>>>>>>>>>>
>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>             result |= CPU_CLMUL;
>>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>             result |= CPU_RTM;
>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>
>>>>>>>>>>>>           // AMD features.
>>>>>>>>>>>>           if (is_amd()) {
>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>           }
>>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>> indicates
>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse !=
>>>>>>>>>>>> 0) {
>>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>
>>>>>>>>>>>> **************************************************************
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> Rohit
>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch (openJDK9)
>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and
>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems
>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the mail
>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you will
>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to comment
>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>            // some defaults for AMD family 15h
>>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>
>>>>>>>
>>>
>


More information about the hotspot-dev mailing list