RFR: Newer AMD 17h (EPYC) Processor family defaults

Tue Sep 12 04:52:46 UTC 2017

Hello David,

>>
>>
>> 1. ExtCpuid1EEx
>>
>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>> inconsistent - and potentially confusing: I would have preferred to see
>> things like ExtCpuid_1E_Ebx, to make it clear.)
>
> Yes, I can change it accordingly.
>

I have attached the updated, re-tested patch as per your comments above.

diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
b/src/cpu/x86/vm/vm_version_x86.cpp
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -70,7 +70,7 @@
     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);

     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
done, wrapup;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
ext_cpuid8, done, wrapup;
     Label legacy_setup, save_restore_except, legacy_save_restore,
start_simd_check;

     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -272,9 +272,23 @@
     __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid7);
+    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid8);
+    //
+    // Extended cpuid(0x8000001E)
+    //
+    __ movl(rax, 0x8000001E);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid_1E_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
     //
     // Extended cpuid(0x80000008)
     //
+    __ bind(ext_cpuid8);
     __ movl(rax, 0x80000008);
     __ cpuid();
     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1123,27 @@
     }

 #ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on current AMD cpus.
+    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on AMD cpus < 17h.
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
 #endif // COMPILER2
+
+    // Some defaults for AMD family 17h
+    if ( cpu_family() == 0x17 ) {
+      // On family 17h processors use XMM and UnalignedLoadStores for
Array Copy
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+      }
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+      }
+#ifdef COMPILER2
+      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
+    }
   }

   if( is_intel() ) { // Intel cpus specific settings
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
b/src/cpu/x86/vm/vm_version_x86.hpp
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -228,6 +228,15 @@
     } bits;
   };

+  union ExtCpuid_1E_Ebx {
+    uint32_t value;
+    struct {
+      uint32_t                  : 8,
+               threads_per_core : 8,
+                                : 16;
+    } bits;
+  };
+
   union XemXcr0Eax {
     uint32_t value;
     struct {
@@ -398,6 +407,12 @@
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved

+    // cpuid function 0x8000001E // AMD 17h
+    uint32_t        ext_cpuid_1E_eax;
+    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
+    uint32_t        ext_cpuid_1E_ecx;
+    uint32_t        ext_cpuid_1E_edx; // unused currently
+
     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
     XemXcr0Eax   xem_xcr0_eax;
     uint32_t     xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@
       result |= CPU_CLMUL;
     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
       result |= CPU_RTM;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+       result |= CPU_ADX;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+      result |= CPU_BMI2;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+      result |= CPU_SHA;
+    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+      result |= CPU_FMA;

     // AMD features.
     if (is_amd()) {
@@ -518,16 +541,8 @@
     }
     // Intel features.
     if(is_intel()) {
-      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
-         result |= CPU_ADX;
-      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
-        result |= CPU_BMI2;
-      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
-        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
-      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
-        result |= CPU_FMA;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
support for prefetchw
       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
         result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@
   static ByteSize ext_cpuid5_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   static ByteSize ext_cpuid7_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+  static ByteSize ext_cpuid_1E_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
   static ByteSize tpl_cpuidB0_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,11 @@
     if (is_intel() && supports_processor_topology()) {
       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
-               cores_per_cpu();
+      if (cpu_family() >= 0x17)
+        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core + 1;
+      else
+        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+                 cores_per_cpu();
     }
     return (result == 0 ? 1 : result);
   }


Please let me know your comments

Thanks for your time.

Regards,
Rohit


>> Thanks,
>> David
>> -----
>>
>>
>>> Reference:
>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
>>> [Pg 82]
>>>
>>>      CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>        15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
>>> The number of threads per core is ThreadsPerCore+1.
>>>
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>> @@ -70,7 +70,7 @@
>>>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>
>>>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> done, wrapup;
>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> ext_cpuid8, done, wrapup;
>>>       Label legacy_setup, save_restore_except, legacy_save_restore,
>>> start_simd_check;
>>>
>>>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>> @@ -272,9 +272,23 @@
>>>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>       __ jccb(Assembler::belowEqual, ext_cpuid7);
>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>> +    //
>>> +    // Extended cpuid(0x8000001E)
>>> +    //
>>> +    __ movl(rax, 0x8000001E);
>>> +    __ cpuid();
>>> +    __ lea(rsi, Address(rbp,
>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>> +    __ movl(Address(rsi, 0), rax);
>>> +    __ movl(Address(rsi, 4), rbx);
>>> +    __ movl(Address(rsi, 8), rcx);
>>> +    __ movl(Address(rsi,12), rdx);
>>> +
>>>       //
>>>       // Extended cpuid(0x80000008)
>>>       //
>>> +    __ bind(ext_cpuid8);
>>>       __ movl(rax, 0x80000008);
>>>       __ cpuid();
>>>       __ lea(rsi, Address(rbp,
>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>> @@ -1109,11 +1123,27 @@
>>>       }
>>>
>>>   #ifdef COMPILER2
>>> -    if (MaxVectorSize > 16) {
>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>       }
>>>   #endif // COMPILER2
>>> +
>>> +    // Some defaults for AMD family 17h
>>> +    if ( cpu_family() == 0x17 ) {
>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>> Array Copy
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>> +      }
>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>> +      }
>>> +#ifdef COMPILER2
>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>> +      }
>>> +#endif
>>> +    }
>>>     }
>>>
>>>     if( is_intel() ) { // Intel cpus specific settings
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>> @@ -228,6 +228,15 @@
>>>       } bits;
>>>     };
>>>
>>> +  union ExtCpuid1EEx {
>>> +    uint32_t value;
>>> +    struct {
>>> +      uint32_t                  : 8,
>>> +               threads_per_core : 8,
>>> +                                : 16;
>>> +    } bits;
>>> +  };
>>> +
>>>     union XemXcr0Eax {
>>>       uint32_t value;
>>>       struct {
>>> @@ -398,6 +407,12 @@
>>>       ExtCpuid8Ecx ext_cpuid8_ecx;
>>>       uint32_t     ext_cpuid8_edx; // reserved
>>>
>>> +    // cpuid function 0x8000001E // AMD 17h
>>> +    uint32_t     ext_cpuid1E_eax;
>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>> +    uint32_t     ext_cpuid1E_ecx;
>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>>> +
>>>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>> register)
>>>       XemXcr0Eax   xem_xcr0_eax;
>>>       uint32_t     xem_xcr0_edx; // reserved
>>> @@ -505,6 +520,14 @@
>>>         result |= CPU_CLMUL;
>>>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>         result |= CPU_RTM;
>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> +       result |= CPU_ADX;
>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> +      result |= CPU_BMI2;
>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> +      result |= CPU_SHA;
>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> +      result |= CPU_FMA;
>>>
>>>       // AMD features.
>>>       if (is_amd()) {
>>> @@ -518,16 +541,8 @@
>>>       }
>>>       // Intel features.
>>>       if(is_intel()) {
>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> -         result |= CPU_ADX;
>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> -        result |= CPU_BMI2;
>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> -        result |= CPU_SHA;
>>>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>           result |= CPU_LZCNT;
>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> -        result |= CPU_FMA;
>>>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>> support for prefetchw
>>>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>           result |= CPU_3DNOW_PREFETCH;
>>> @@ -590,6 +605,7 @@
>>>     static ByteSize ext_cpuid5_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>     static ByteSize ext_cpuid7_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>     static ByteSize ext_cpuid8_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>> +  static ByteSize ext_cpuid1E_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>     static ByteSize tpl_cpuidB0_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>     static ByteSize tpl_cpuidB1_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>     static ByteSize tpl_cpuidB2_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>> @@ -673,8 +689,11 @@
>>>       if (is_intel() && supports_processor_topology()) {
>>>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> -               cores_per_cpu();
>>> +      if (cpu_family() >= 0x17)
>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>>> +      else
>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> +                 cores_per_cpu();
>>>       }
>>>       return (result == 0 ? 1 : result);
>>>     }
>>>
>>> I have attached the patch for review.
>>> Please let me know your comments.
>>>
>>> Thanks,
>>> Rohit
>>>
>>>> Thanks,
>>>> Vladimir
>>>>
>>>>
>>>>>
>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>
>>>>> No comments on AMD specific changes.
>>>>>
>>>>> Thanks,
>>>>> David
>>>>> -----
>>>>>
>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>
>>>>>>
>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>
>>>>>>>
>>>>>>> Hello David,
>>>>>>>
>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>> <david.holmes at oracle.com>
>>>>>>> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> Hi Rohit,
>>>>>>>>
>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>>>>
>>>>>>>
>>>>>>> I checked out the latest jdk10/hs/hotspot [parent: 13548:1a9c2e07a826]
>>>>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>>>>> without any issues.
>>>>>>> Can you share the error message that you are getting?
>>>>>>
>>>>>>
>>>>>>
>>>>>> I was getting this:
>>>>>>
>>>>>> applying hotspot.patch
>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> Hunk #1 FAILED at 1108
>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> Hunk #2 FAILED at 522
>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>> abort: patch failed to apply
>>>>>>
>>>>>> but I started again and this time it applied fine, so not sure what was
>>>>>> going on there.
>>>>>>
>>>>>> Cheers,
>>>>>> David
>>>>>>
>>>>>>> Regards,
>>>>>>> Rohit
>>>>>>>
>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hello Vladimir,
>>>>>>>>>
>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Hi Rohit,
>>>>>>>>>>
>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>
>>>>>>>>>>>> Changes look good. Only question I have is about MaxVectorSize.
>>>>>>>>>>>> It
>>>>>>>>>>>> is
>>>>>>>>>>>> set
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>>
>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD
>>>>>>>>>>> 17h.
>>>>>>>>>>> So
>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my patch.
>>>>>>>>>>> I
>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Which check you removed?
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> My older patch had the below mentioned check which was required on
>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>
>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>> ...
>>>>>>>>> ...
>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>> +      }
>>>>>>>>> ..
>>>>>>>>> ..
>>>>>>>>> +      }
>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>>
>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag gets
>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is
>>>>>>>>>>> there
>>>>>>>>>>> an
>>>>>>>>>>> underlying reason for this? I have handled this in the patch but
>>>>>>>>>>> just
>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>> instructions
>>>>>>>>>> to
>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>
>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>>>
>>>>>>>>>> I don't know if AMD 15h supports these instructions and can execute
>>>>>>>>>> that
>>>>>>>>>> code. You need to test it.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2 instructions,
>>>>>>>>> it should work.
>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>
>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>>>
>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>
>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>
>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>         }
>>>>>>>>>
>>>>>>>>>     #ifdef COMPILER2
>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>         }
>>>>>>>>>     #endif // COMPILER2
>>>>>>>>> +
>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>> for
>>>>>>>>> Array Copy
>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>> +      }
>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>> {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>> +      }
>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>> {
>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>> +      }
>>>>>>>>> +#endif
>>>>>>>>> +    }
>>>>>>>>>       }
>>>>>>>>>
>>>>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>           result |= CPU_CLMUL;
>>>>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>           result |= CPU_RTM;
>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>
>>>>>>>>>         // AMD features.
>>>>>>>>>         if (is_amd()) {
>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>             result |= CPU_LZCNT;
>>>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>             result |= CPU_SSE4A;
>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>         }
>>>>>>>>>         // Intel features.
>>>>>>>>>         if(is_intel()) {
>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>             result |= CPU_LZCNT;
>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>>>> support for prefetchw
>>>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>
>>>>>>>>> Please let me know your comments.
>>>>>>>>>
>>>>>>>>> Thanks for your time.
>>>>>>>>> Rohit
>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>            }
>>>>>>>>>>>            FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>          }
>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>> +      }
>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>>>>> ||
>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>> CPU");
>>>>>>>>>>> +      }
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>> +    }
>>>>>>>>>>>
>>>>>>>>>>>          // some defaults for AMD family 15h
>>>>>>>>>>>          if ( cpu_family() == 0x15 ) {
>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>          }
>>>>>>>>>>>
>>>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>          }
>>>>>>>>>>>      #endif // COMPILER2
>>>>>>>>>>> +
>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>>> for
>>>>>>>>>>> Array Copy
>>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>> {
>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>> {
>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>> +      }
>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>> +        }
>>>>>>>>>>> +      }
>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>> +        }
>>>>>>>>>>> +      }
>>>>>>>>>>> +#endif
>>>>>>>>>>> +    }
>>>>>>>>>>>        }
>>>>>>>>>>>
>>>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>
>>>>>>>>>>>          // AMD features.
>>>>>>>>>>>          if (is_amd()) {
>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>              result |= CPU_SSE4A;
>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>          }
>>>>>>>>>>>          // Intel features.
>>>>>>>>>>>          if(is_intel()) {
>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>> indicates
>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Rohit
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see a
>>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base,
>>>>>>>>>>>>>> test
>>>>>>>>>>>>>> and
>>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>
>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg ($make
>>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Can anyone please volunteer to review this patch  which sets
>>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>>
>>>>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>             }
>>>>>>>>>>>>>             FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>           }
>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>> ||
>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>
>>>>>>>>>>>>>           // some defaults for AMD family 15h
>>>>>>>>>>>>>           if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>
>>>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>> for
>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>> hash
>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> +        }
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>> +        }
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>         }
>>>>>>>>>>>>>
>>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>             result |= CPU_CLMUL;
>>>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>             result |= CPU_RTM;
>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>
>>>>>>>>>>>>>           // AMD features.
>>>>>>>>>>>>>           if (is_amd()) {
>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>> indicates
>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse !=
>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>
>>>>>>>>>>>>> **************************************************************
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch (openJDK9)
>>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and
>>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems
>>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the mail
>>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you will
>>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to comment
>>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>            // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>
>>>>
>>