RFR: Newer AMD 17h (EPYC) Processor family defaults

Thu Sep 7 05:24:21 UTC 2017

Hello Vladimir, David,

>
>>
>> You added:
>>
>>   526       if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>   527         result |= CPU_HT;
>>
>> and I'm wondering of there would be any case where this would not be
>> covered by the earlier:
>>
>>   448     if (threads_per_core() > 1)
>>   449       result |= CPU_HT;
>>
>> ?
>> ---
>
>
> Valid question.
>

Thanks for your review and comments.
I have updated the patch to calculate threads per core by using the
CPUID bit: CPUID_Fn8000001E_EBX [8:15].

Reference:
https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf [Pg 82]

    CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
      15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
The number of threads per core is ThreadsPerCore+1.

diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
b/src/cpu/x86/vm/vm_version_x86.cpp
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -70,7 +70,7 @@
     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);

     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
done, wrapup;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
ext_cpuid8, done, wrapup;
     Label legacy_setup, save_restore_except, legacy_save_restore,
start_simd_check;

     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -272,9 +272,23 @@
     __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid7);
+    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid8);
+    //
+    // Extended cpuid(0x8000001E)
+    //
+    __ movl(rax, 0x8000001E);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
     //
     // Extended cpuid(0x80000008)
     //
+    __ bind(ext_cpuid8);
     __ movl(rax, 0x80000008);
     __ cpuid();
     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1123,27 @@
     }

 #ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on current AMD cpus.
+    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on AMD cpus < 17h.
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
 #endif // COMPILER2
+
+    // Some defaults for AMD family 17h
+    if ( cpu_family() == 0x17 ) {
+      // On family 17h processors use XMM and UnalignedLoadStores for
Array Copy
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+      }
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+      }
+#ifdef COMPILER2
+      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
+    }
   }

   if( is_intel() ) { // Intel cpus specific settings
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
b/src/cpu/x86/vm/vm_version_x86.hpp
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -228,6 +228,15 @@
     } bits;
   };

+  union ExtCpuid1EEx {
+    uint32_t value;
+    struct {
+      uint32_t                  : 8,
+               threads_per_core : 8,
+                                : 16;
+    } bits;
+  };
+
   union XemXcr0Eax {
     uint32_t value;
     struct {
@@ -398,6 +407,12 @@
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved

+    // cpuid function 0x8000001E // AMD 17h
+    uint32_t     ext_cpuid1E_eax;
+    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
+    uint32_t     ext_cpuid1E_ecx;
+    uint32_t     ext_cpuid1E_edx; // unused currently
+
     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
     XemXcr0Eax   xem_xcr0_eax;
     uint32_t     xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@
       result |= CPU_CLMUL;
     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
       result |= CPU_RTM;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+       result |= CPU_ADX;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+      result |= CPU_BMI2;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+      result |= CPU_SHA;
+    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+      result |= CPU_FMA;

     // AMD features.
     if (is_amd()) {
@@ -518,16 +541,8 @@
     }
     // Intel features.
     if(is_intel()) {
-      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
-         result |= CPU_ADX;
-      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
-        result |= CPU_BMI2;
-      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
-        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
-      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
-        result |= CPU_FMA;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
support for prefetchw
       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
         result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@
   static ByteSize ext_cpuid5_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   static ByteSize ext_cpuid7_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+  static ByteSize ext_cpuid1E_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
   static ByteSize tpl_cpuidB0_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,11 @@
     if (is_intel() && supports_processor_topology()) {
       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
-               cores_per_cpu();
+      if (cpu_family() >= 0x17)
+        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
+      else
+        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+                 cores_per_cpu();
     }
     return (result == 0 ? 1 : result);
   }

I have attached the patch for review.
Please let me know your comments.

Thanks,
Rohit

> Thanks,
> Vladimir
>
>
>>
>> src/cpu/x86/vm/vm_version_x86.cpp
>>
>> No comments on AMD specific changes.
>>
>> Thanks,
>> David
>> -----
>>
>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>
>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>
>>>> Hello David,
>>>>
>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes <david.holmes at oracle.com>
>>>> wrote:
>>>>>
>>>>> Hi Rohit,
>>>>>
>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>
>>>>
>>>> I checked out the latest jdk10/hs/hotspot [parent: 13548:1a9c2e07a826]
>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>> without any issues.
>>>> Can you share the error message that you are getting?
>>>
>>>
>>> I was getting this:
>>>
>>> applying hotspot.patch
>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>> Hunk #1 FAILED at 1108
>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>> Hunk #2 FAILED at 522
>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>> abort: patch failed to apply
>>>
>>> but I started again and this time it applied fine, so not sure what was
>>> going on there.
>>>
>>> Cheers,
>>> David
>>>
>>>> Regards,
>>>> Rohit
>>>>
>>>>
>>>>>
>>>>>
>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>
>>>>>>
>>>>>> Hello Vladimir,
>>>>>>
>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>> Hi Rohit,
>>>>>>>
>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Hello Vladimir,
>>>>>>>>
>>>>>>>>> Changes look good. Only question I have is about MaxVectorSize. It
>>>>>>>>> is
>>>>>>>>> set
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>
>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD 17h.
>>>>>>>> So
>>>>>>>> I have removed the surplus check for MaxVectorSize from my patch. I
>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Which check you removed?
>>>>>>>
>>>>>>
>>>>>> My older patch had the below mentioned check which was required on
>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>
>>>>>> +    // Some defaults for AMD family 17h
>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>> ...
>>>>>> ...
>>>>>> +      if (MaxVectorSize > 32) {
>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>> +      }
>>>>>> ..
>>>>>> ..
>>>>>> +      }
>>>>>>
>>>>>>>>
>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>
>>>>>>>> AMD 17h has support for SHA.
>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag gets
>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is there
>>>>>>>> an
>>>>>>>> underlying reason for this? I have handled this in the patch but
>>>>>>>> just
>>>>>>>> wanted to confirm.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>> instructions
>>>>>>> to
>>>>>>> calculate SHA-256:
>>>>>>>
>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>
>>>>>>> I don't know if AMD 15h supports these instructions and can execute
>>>>>>> that
>>>>>>> code. You need to test it.
>>>>>>>
>>>>>>
>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2 instructions,
>>>>>> it should work.
>>>>>> Confirmed by running following sanity tests:
>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>
>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>
>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>
>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>
>>>>>> Please find attached updated, re-tested patch.
>>>>>>
>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>        }
>>>>>>
>>>>>>    #ifdef COMPILER2
>>>>>> -    if (MaxVectorSize > 16) {
>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>        }
>>>>>>    #endif // COMPILER2
>>>>>> +
>>>>>> +    // Some defaults for AMD family 17h
>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>>>> Array Copy
>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>> +      }
>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>> {
>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>> +      }
>>>>>> +#ifdef COMPILER2
>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>> +      }
>>>>>> +#endif
>>>>>> +    }
>>>>>>      }
>>>>>>
>>>>>>      if( is_intel() ) { // Intel cpus specific settings
>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>> @@ -505,6 +505,14 @@
>>>>>>          result |= CPU_CLMUL;
>>>>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>          result |= CPU_RTM;
>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>> +       result |= CPU_ADX;
>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>> +      result |= CPU_BMI2;
>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>> +      result |= CPU_SHA;
>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>> +      result |= CPU_FMA;
>>>>>>
>>>>>>        // AMD features.
>>>>>>        if (is_amd()) {
>>>>>> @@ -515,19 +523,13 @@
>>>>>>            result |= CPU_LZCNT;
>>>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>            result |= CPU_SSE4A;
>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>> +        result |= CPU_HT;
>>>>>>        }
>>>>>>        // Intel features.
>>>>>>        if(is_intel()) {
>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>> -         result |= CPU_ADX;
>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>> -        result |= CPU_BMI2;
>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>> -        result |= CPU_SHA;
>>>>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>            result |= CPU_LZCNT;
>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>> -        result |= CPU_FMA;
>>>>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>> support for prefetchw
>>>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>            result |= CPU_3DNOW_PREFETCH;
>>>>>>
>>>>>> Please let me know your comments.
>>>>>>
>>>>>> Thanks for your time.
>>>>>> Rohit
>>>>>>
>>>>>>>>
>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>
>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>           }
>>>>>>>>           FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>         }
>>>>>>>> +    if (supports_sha()) {
>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>> +      }
>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>> ||
>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>> +        warning("SHA instructions are not available on this CPU");
>>>>>>>> +      }
>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>> +    }
>>>>>>>>
>>>>>>>>         // some defaults for AMD family 15h
>>>>>>>>         if ( cpu_family() == 0x15 ) {
>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>         }
>>>>>>>>
>>>>>>>>     #ifdef COMPILER2
>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>         }
>>>>>>>>     #endif // COMPILER2
>>>>>>>> +
>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>> for
>>>>>>>> Array Copy
>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>> +      }
>>>>>>>> +      if (supports_sse2() &&
>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>> +      }
>>>>>>>> +      if (supports_bmi2() && FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>> {
>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>> +      }
>>>>>>>> +      if (UseSHA) {
>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>> functions not available on this CPU.");
>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>> +        }
>>>>>>>> +      }
>>>>>>>> +#ifdef COMPILER2
>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>> +        }
>>>>>>>> +      }
>>>>>>>> +#endif
>>>>>>>> +    }
>>>>>>>>       }
>>>>>>>>
>>>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>           result |= CPU_CLMUL;
>>>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>           result |= CPU_RTM;
>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>> +       result |= CPU_ADX;
>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>> +      result |= CPU_SHA;
>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>
>>>>>>>>         // AMD features.
>>>>>>>>         if (is_amd()) {
>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>             result |= CPU_LZCNT;
>>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>             result |= CPU_SSE4A;
>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>> +        result |= CPU_HT;
>>>>>>>>         }
>>>>>>>>         // Intel features.
>>>>>>>>         if(is_intel()) {
>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>> -         result |= CPU_ADX;
>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>             result |= CPU_LZCNT;
>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>>> support for prefetchw
>>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>>>>
>>>>>>>>
>>>>>>>> Regards,
>>>>>>>> Rohit
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>> wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>
>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see a
>>>>>>>>>>>> lot of
>>>>>>>>>>>> logic
>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> David
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base, test
>>>>>>>>>>> and
>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Rohit
>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Hi All,
>>>>>>>>>>
>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg ($make
>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>
>>>>>>>>>> Can anyone please volunteer to review this patch  which sets
>>>>>>>>>> flag/ISA
>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>
>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>            }
>>>>>>>>>>            FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>          }
>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>> +      }
>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics || UseSHA256Intrinsics
>>>>>>>>>> ||
>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>> CPU");
>>>>>>>>>> +      }
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>> +    }
>>>>>>>>>>
>>>>>>>>>>          // some defaults for AMD family 15h
>>>>>>>>>>          if ( cpu_family() == 0x15 ) {
>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>          }
>>>>>>>>>>
>>>>>>>>>>      #ifdef COMPILER2
>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>            FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>          }
>>>>>>>>>>      #endif // COMPILER2
>>>>>>>>>> +
>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>> for
>>>>>>>>>> Array Copy
>>>>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>> {
>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>> +      }
>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>> {
>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>> +      }
>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>> +      }
>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>> +      }
>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto hash
>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>> +        }
>>>>>>>>>> +      }
>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>> +        }
>>>>>>>>>> +      }
>>>>>>>>>> +#endif
>>>>>>>>>> +    }
>>>>>>>>>>        }
>>>>>>>>>>
>>>>>>>>>>        if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>            result |= CPU_CLMUL;
>>>>>>>>>>          if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>            result |= CPU_RTM;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>
>>>>>>>>>>          // AMD features.
>>>>>>>>>>          if (is_amd()) {
>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>              result |= CPU_SSE4A;
>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>          }
>>>>>>>>>>          // Intel features.
>>>>>>>>>>          if(is_intel()) {
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>            if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>              result |= CPU_LZCNT;
>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>            // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>> indicates
>>>>>>>>>> support for prefetchw
>>>>>>>>>>            if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>>>>              result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>
>>>>>>>>>> **************************************************************
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> Rohit
>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I would like an volunteer to review this patch (openJDK9)
>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and help
>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems outside
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for reference.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the mail
>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>> If
>>>>>>>>>>>>>> the
>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you will
>>>>>>>>>>>>>> need
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> find
>>>>>>>>>>>>>> an
>>>>>>>>>>>>>> OpenJDK Author who can host it for you on cr.openjdk.java.net.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make default)
>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to comment
>>>>>>>>>>>>>> on
>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>> David
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>             }
>>>>>>>>>>>>>             FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>           }
>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>> ||
>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>
>>>>>>>>>>>>>           // some defaults for AMD family 15h
>>>>>>>>>>>>>           if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>
>>>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>> for
>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>> hash
>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> +        }
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>> +        }
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>         }
>>>>>>>>>>>>>
>>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>
>>>>>>>
>>>>>
>