RFR: Newer AMD 17h (EPYC) Processor family defaults

Fri Sep 1 15:04:04 UTC 2017

On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj <rohitarulraj at gmail.com> wrote:
> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes <david.holmes at oracle.com> wrote:
>> Hi Rohit,
>>
>> I think the patch needs updating for jdk10 as I already see a lot of logic
>> around UseSHA in vm_version_x86.cpp.
>>
>> Thanks,
>> David
>>
>
> Thanks David, I will update the patch wrt JDK10 source base, test and
> resubmit for review.
>
> Regards,
> Rohit
>

Hi All,

I have updated the patch wrt openjdk10/hotspot (parent:
13519:71337910df60), did regression testing using jtreg ($make
default) and didnt find any regressions.

Can anyone please volunteer to review this patch  which sets flag/ISA
defaults for newer AMD 17h (EPYC) processor?

************************* Patch ****************************

diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
b/src/cpu/x86/vm/vm_version_x86.cpp
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -1088,6 +1088,22 @@
       }
       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
     }
+    if (supports_sha()) {
+      if (FLAG_IS_DEFAULT(UseSHA)) {
+        FLAG_SET_DEFAULT(UseSHA, true);
+      }
+    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics ||
UseSHA512Intrinsics) {
+      if (!FLAG_IS_DEFAULT(UseSHA) ||
+          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
+          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
+          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+        warning("SHA instructions are not available on this CPU");
+      }
+      FLAG_SET_DEFAULT(UseSHA, false);
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+    }

     // some defaults for AMD family 15h
     if ( cpu_family() == 0x15 ) {
@@ -1109,11 +1125,43 @@
     }

 #ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on current AMD cpus.
+    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on AMD cpus < 17h.
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
 #endif // COMPILER2
+
+    // Some defaults for AMD family 17h
+    if ( cpu_family() == 0x17 ) {
+      // On family 17h processors use XMM and UnalignedLoadStores for
Array Copy
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        UseXMMForArrayCopy = true;
+      }
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+        UseUnalignedLoadStores = true;
+      }
+      if (supports_bmi2() && FLAG_IS_DEFAULT(UseBMI2Instructions)) {
+        UseBMI2Instructions = true;
+      }
+      if (MaxVectorSize > 32) {
+        FLAG_SET_DEFAULT(MaxVectorSize, 32);
+      }
+      if (UseSHA) {
+        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+        } else if (UseSHA512Intrinsics) {
+          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
functions not available on this CPU.");
+          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+        }
+      }
+#ifdef COMPILER2
+      if (supports_sse4_2()) {
+        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+        }
+      }
+#endif
+    }
   }

   if( is_intel() ) { // Intel cpus specific settings
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
b/src/cpu/x86/vm/vm_version_x86.hpp
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -505,6 +505,14 @@
       result |= CPU_CLMUL;
     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
       result |= CPU_RTM;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+       result |= CPU_ADX;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+      result |= CPU_BMI2;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+      result |= CPU_SHA;
+    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+      result |= CPU_FMA;

     // AMD features.
     if (is_amd()) {
@@ -515,19 +523,13 @@
         result |= CPU_LZCNT;
       if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
         result |= CPU_SSE4A;
+      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
+        result |= CPU_HT;
     }
     // Intel features.
     if(is_intel()) {
-      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
-         result |= CPU_ADX;
-      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
-        result |= CPU_BMI2;
-      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
-        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
-      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
-        result |= CPU_FMA;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
support for prefetchw
       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
         result |= CPU_3DNOW_PREFETCH;

**************************************************************

Thanks,
Rohit

>>
>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>
>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes <david.holmes at oracle.com>
>>> wrote:
>>>>
>>>> Hi Rohit,
>>>>
>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>
>>>>>
>>>>> I would like an volunteer to review this patch (openJDK9) which sets
>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and help us with
>>>>> the commit process.
>>>>>
>>>>> Webrev:
>>>>>
>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>
>>>>
>>>>
>>>> Unfortunately patches can not be accepted from systems outside the
>>>> OpenJDK
>>>> infrastructure and ...
>>>>
>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>
>>>>
>>>>
>>>> ... unfortunately patches tend to get stripped by the mail servers. If
>>>> the
>>>> patch is small please include it inline. Otherwise you will need to find
>>>> an
>>>> OpenJDK Author who can host it for you on cr.openjdk.java.net.
>>>>
>>>
>>>>> 3) I have done regression testing using jtreg ($make default) and
>>>>> didnt find any regressions.
>>>>
>>>>
>>>>
>>>> Sounds good, but until I see the patch it is hard to comment on testing
>>>> requirements.
>>>>
>>>> Thanks,
>>>> David
>>>
>>>
>>> Thanks David,
>>> Yes, it's a small patch.
>>>
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>> @@ -1051,6 +1051,22 @@
>>>         }
>>>         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>       }
>>> +    if (supports_sha()) {
>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>> +      }
>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics ||
>>> UseSHA512Intrinsics) {
>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>> +        warning("SHA instructions are not available on this CPU");
>>> +      }
>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>> +    }
>>>
>>>       // some defaults for AMD family 15h
>>>       if ( cpu_family() == 0x15 ) {
>>> @@ -1072,11 +1088,43 @@
>>>       }
>>>
>>>   #ifdef COMPILER2
>>> -    if (MaxVectorSize > 16) {
>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>       }
>>>   #endif // COMPILER2
>>> +
>>> +    // Some defaults for AMD family 17h
>>> +    if ( cpu_family() == 0x17 ) {
>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>> Array Copy
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>> +        UseXMMForArrayCopy = true;
>>> +      }
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>> +        UseUnalignedLoadStores = true;
>>> +      }
>>> +      if (supports_bmi2() && FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>> +        UseBMI2Instructions = true;
>>> +      }
>>> +      if (MaxVectorSize > 32) {
>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>> +      }
>>> +      if (UseSHA) {
>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>> +        } else if (UseSHA512Intrinsics) {
>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>> functions not available on this CPU.");
>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>> +        }
>>> +      }
>>> +#ifdef COMPILER2
>>> +      if (supports_sse4_2()) {
>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>> +        }
>>> +      }
>>> +#endif
>>> +    }
>>>     }
>>>
>>>     if( is_intel() ) { // Intel cpus specific settings
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>> @@ -513,6 +513,16 @@
>>>           result |= CPU_LZCNT;
>>>         if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>           result |= CPU_SSE4A;
>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> +        result |= CPU_BMI2;
>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>> +        result |= CPU_HT;
>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> +        result |= CPU_ADX;
>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> +        result |= CPU_SHA;
>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> +        result |= CPU_FMA;
>>>       }
>>>       // Intel features.
>>>       if(is_intel()) {
>>>
>>> Regards,
>>> Rohit
>>>
>>