RFR: Newer AMD 17h (EPYC) Processor family defaults

Vladimir Kozlov vladimir.kozlov at oracle.com
Fri Sep 1 16:43:54 UTC 2017


Hi Rohit,

Changes look good. Only question I have is about MaxVectorSize. It is set > 16 only in presence of AVX:

http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945

Does that code works for AMD 17h too?

Thanks,
Vladimir

On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj <rohitarulraj at gmail.com> wrote:
>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes <david.holmes at oracle.com> wrote:
>>> Hi Rohit,
>>>
>>> I think the patch needs updating for jdk10 as I already see a lot of logic
>>> around UseSHA in vm_version_x86.cpp.
>>>
>>> Thanks,
>>> David
>>>
>>
>> Thanks David, I will update the patch wrt JDK10 source base, test and
>> resubmit for review.
>>
>> Regards,
>> Rohit
>>
> 
> Hi All,
> 
> I have updated the patch wrt openjdk10/hotspot (parent:
> 13519:71337910df60), did regression testing using jtreg ($make
> default) and didnt find any regressions.
> 
> Can anyone please volunteer to review this patch  which sets flag/ISA
> defaults for newer AMD 17h (EPYC) processor?
> 
> ************************* Patch ****************************
> 
> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> b/src/cpu/x86/vm/vm_version_x86.cpp
> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> @@ -1088,6 +1088,22 @@
>         }
>         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>       }
> +    if (supports_sha()) {
> +      if (FLAG_IS_DEFAULT(UseSHA)) {
> +        FLAG_SET_DEFAULT(UseSHA, true);
> +      }
> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics ||
> UseSHA512Intrinsics) {
> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> +        warning("SHA instructions are not available on this CPU");
> +      }
> +      FLAG_SET_DEFAULT(UseSHA, false);
> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> +    }
> 
>       // some defaults for AMD family 15h
>       if ( cpu_family() == 0x15 ) {
> @@ -1109,11 +1125,43 @@
>       }
> 
>   #ifdef COMPILER2
> -    if (MaxVectorSize > 16) {
> -      // Limit vectors size to 16 bytes on current AMD cpus.
> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>       }
>   #endif // COMPILER2
> +
> +    // Some defaults for AMD family 17h
> +    if ( cpu_family() == 0x17 ) {
> +      // On family 17h processors use XMM and UnalignedLoadStores for
> Array Copy
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> +        UseXMMForArrayCopy = true;
> +      }
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> +        UseUnalignedLoadStores = true;
> +      }
> +      if (supports_bmi2() && FLAG_IS_DEFAULT(UseBMI2Instructions)) {
> +        UseBMI2Instructions = true;
> +      }
> +      if (MaxVectorSize > 32) {
> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
> +      }
> +      if (UseSHA) {
> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> +        } else if (UseSHA512Intrinsics) {
> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
> functions not available on this CPU.");
> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> +        }
> +      }
> +#ifdef COMPILER2
> +      if (supports_sse4_2()) {
> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> +        }
> +      }
> +#endif
> +    }
>     }
> 
>     if( is_intel() ) { // Intel cpus specific settings
> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> b/src/cpu/x86/vm/vm_version_x86.hpp
> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> @@ -505,6 +505,14 @@
>         result |= CPU_CLMUL;
>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>         result |= CPU_RTM;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> +       result |= CPU_ADX;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> +      result |= CPU_BMI2;
> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> +      result |= CPU_SHA;
> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> +      result |= CPU_FMA;
> 
>       // AMD features.
>       if (is_amd()) {
> @@ -515,19 +523,13 @@
>           result |= CPU_LZCNT;
>         if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>           result |= CPU_SSE4A;
> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
> +        result |= CPU_HT;
>       }
>       // Intel features.
>       if(is_intel()) {
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> -         result |= CPU_ADX;
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> -        result |= CPU_BMI2;
> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> -        result |= CPU_SHA;
>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>           result |= CPU_LZCNT;
> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> -        result |= CPU_FMA;
>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> support for prefetchw
>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>           result |= CPU_3DNOW_PREFETCH;
> 
> **************************************************************
> 
> Thanks,
> Rohit
> 
>>>
>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>
>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes <david.holmes at oracle.com>
>>>> wrote:
>>>>>
>>>>> Hi Rohit,
>>>>>
>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>
>>>>>>
>>>>>> I would like an volunteer to review this patch (openJDK9) which sets
>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and help us with
>>>>>> the commit process.
>>>>>>
>>>>>> Webrev:
>>>>>>
>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>
>>>>>
>>>>>
>>>>> Unfortunately patches can not be accepted from systems outside the
>>>>> OpenJDK
>>>>> infrastructure and ...
>>>>>
>>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>>
>>>>>
>>>>>
>>>>> ... unfortunately patches tend to get stripped by the mail servers. If
>>>>> the
>>>>> patch is small please include it inline. Otherwise you will need to find
>>>>> an
>>>>> OpenJDK Author who can host it for you on cr.openjdk.java.net.
>>>>>
>>>>
>>>>>> 3) I have done regression testing using jtreg ($make default) and
>>>>>> didnt find any regressions.
>>>>>
>>>>>
>>>>>
>>>>> Sounds good, but until I see the patch it is hard to comment on testing
>>>>> requirements.
>>>>>
>>>>> Thanks,
>>>>> David
>>>>
>>>>
>>>> Thanks David,
>>>> Yes, it's a small patch.
>>>>
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> @@ -1051,6 +1051,22 @@
>>>>          }
>>>>          FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>        }
>>>> +    if (supports_sha()) {
>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>> +      }
>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics ||
>>>> UseSHA512Intrinsics) {
>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>> +        warning("SHA instructions are not available on this CPU");
>>>> +      }
>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>> +    }
>>>>
>>>>        // some defaults for AMD family 15h
>>>>        if ( cpu_family() == 0x15 ) {
>>>> @@ -1072,11 +1088,43 @@
>>>>        }
>>>>
>>>>    #ifdef COMPILER2
>>>> -    if (MaxVectorSize > 16) {
>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>        }
>>>>    #endif // COMPILER2
>>>> +
>>>> +    // Some defaults for AMD family 17h
>>>> +    if ( cpu_family() == 0x17 ) {
>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>> Array Copy
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>> +        UseXMMForArrayCopy = true;
>>>> +      }
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>> +        UseUnalignedLoadStores = true;
>>>> +      }
>>>> +      if (supports_bmi2() && FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>> +        UseBMI2Instructions = true;
>>>> +      }
>>>> +      if (MaxVectorSize > 32) {
>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>> +      }
>>>> +      if (UseSHA) {
>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>> +        } else if (UseSHA512Intrinsics) {
>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>> functions not available on this CPU.");
>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>> +        }
>>>> +      }
>>>> +#ifdef COMPILER2
>>>> +      if (supports_sse4_2()) {
>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>> +        }
>>>> +      }
>>>> +#endif
>>>> +    }
>>>>      }
>>>>
>>>>      if( is_intel() ) { // Intel cpus specific settings
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> @@ -513,6 +513,16 @@
>>>>            result |= CPU_LZCNT;
>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>            result |= CPU_SSE4A;
>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> +        result |= CPU_BMI2;
>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>> +        result |= CPU_HT;
>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> +        result |= CPU_ADX;
>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> +        result |= CPU_SHA;
>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> +        result |= CPU_FMA;
>>>>        }
>>>>        // Intel features.
>>>>        if(is_intel()) {
>>>>
>>>> Regards,
>>>> Rohit
>>>>
>>>


More information about the hotspot-dev mailing list