RFR: Newer AMD 17h (EPYC) Processor family defaults

Rohit Arul Raj rohitarulraj at gmail.com
Fri Sep 22 07:41:24 UTC 2017


Thanks Vladimir,

On Wed, Sep 20, 2017 at 10:07 PM, Vladimir Kozlov
<vladimir.kozlov at oracle.com> wrote:
>>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>       __ jcc(Assembler::belowEqual, done);
>>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>> +   __ jcc(Assembler::belowEqual, ext_cpuid1);
>
>
> Good. You may need to increase size of the buffer too (to be safe) to 1100:
>
> static const int stub_size = 1000;
>

Please find the updated patch after the requested change.

diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
b/src/cpu/x86/vm/vm_version_x86.cpp
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -46,7 +46,7 @@
 address VM_Version::_cpuinfo_cont_addr = 0;

 static BufferBlob* stub_blob;
-static const int stub_size = 1000;
+static const int stub_size = 1100;

 extern "C" {
   typedef void (*get_cpu_info_stub_t)(void*);
@@ -70,7 +70,7 @@
     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);

     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
-    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
done, wrapup;
+    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
ext_cpuid8, done, wrapup;
     Label legacy_setup, save_restore_except, legacy_save_restore,
start_simd_check;

     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -267,14 +267,30 @@
     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
     __ jcc(Assembler::belowEqual, done);
     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
-    __ jccb(Assembler::belowEqual, ext_cpuid1);
+    __ jcc(Assembler::belowEqual, ext_cpuid1);
     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid5);
     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
     __ jccb(Assembler::belowEqual, ext_cpuid7);
+    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
+    __ jccb(Assembler::belowEqual, ext_cpuid8);
+    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
+    __ jccb(Assembler::below, ext_cpuid8);
+    //
+    // Extended cpuid(0x8000001E)
+    //
+    __ movl(rax, 0x8000001E);
+    __ cpuid();
+    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
+    __ movl(Address(rsi, 0), rax);
+    __ movl(Address(rsi, 4), rbx);
+    __ movl(Address(rsi, 8), rcx);
+    __ movl(Address(rsi,12), rdx);
+
     //
     // Extended cpuid(0x80000008)
     //
+    __ bind(ext_cpuid8);
     __ movl(rax, 0x80000008);
     __ cpuid();
     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1125,27 @@
     }

 #ifdef COMPILER2
-    if (MaxVectorSize > 16) {
-      // Limit vectors size to 16 bytes on current AMD cpus.
+    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on AMD cpus < 17h.
       FLAG_SET_DEFAULT(MaxVectorSize, 16);
     }
 #endif // COMPILER2
+
+    // Some defaults for AMD family 17h
+    if ( cpu_family() == 0x17 ) {
+      // On family 17h processors use XMM and UnalignedLoadStores for
Array Copy
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+      }
+      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+      }
+#ifdef COMPILER2
+      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+      }
+#endif
+    }
   }

   if( is_intel() ) { // Intel cpus specific settings
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
b/src/cpu/x86/vm/vm_version_x86.hpp
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -228,6 +228,15 @@
     } bits;
   };

+  union ExtCpuid1EEbx {
+    uint32_t value;
+    struct {
+      uint32_t                  : 8,
+               threads_per_core : 8,
+                                : 16;
+    } bits;
+  };
+
   union XemXcr0Eax {
     uint32_t value;
     struct {
@@ -398,6 +407,12 @@
     ExtCpuid8Ecx ext_cpuid8_ecx;
     uint32_t     ext_cpuid8_edx; // reserved

+    // cpuid function 0x8000001E // AMD 17h
+    uint32_t      ext_cpuid1E_eax;
+    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
+    uint32_t      ext_cpuid1E_ecx;
+    uint32_t      ext_cpuid1E_edx; // unused currently
+
     // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
     XemXcr0Eax   xem_xcr0_eax;
     uint32_t     xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@
       result |= CPU_CLMUL;
     if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
       result |= CPU_RTM;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+       result |= CPU_ADX;
+    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+      result |= CPU_BMI2;
+    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+      result |= CPU_SHA;
+    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+      result |= CPU_FMA;

     // AMD features.
     if (is_amd()) {
@@ -518,16 +541,8 @@
     }
     // Intel features.
     if(is_intel()) {
-      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
-         result |= CPU_ADX;
-      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
-        result |= CPU_BMI2;
-      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
-        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
-      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
-        result |= CPU_FMA;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
support for prefetchw
       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
         result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@
   static ByteSize ext_cpuid5_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
   static ByteSize ext_cpuid7_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
   static ByteSize ext_cpuid8_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+  static ByteSize ext_cpuid1E_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
   static ByteSize tpl_cpuidB0_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
   static ByteSize tpl_cpuidB1_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
   static ByteSize tpl_cpuidB2_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,12 @@
     if (is_intel() && supports_processor_topology()) {
       result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
     } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
-      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
-               cores_per_cpu();
+      if (cpu_family() >= 0x17) {
+        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
+      } else {
+        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+                 cores_per_cpu();
+      }
     }
     return (result == 0 ? 1 : result);
   }

Regards,
Rohit

>>
>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>> b/src/cpu/x86/vm/vm_version_x86.cpp
>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>> @@ -70,7 +70,7 @@
>>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>
>>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> done, wrapup;
>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> ext_cpuid8, done, wrapup;
>>       Label legacy_setup, save_restore_except, legacy_save_restore,
>> start_simd_check;
>>
>>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>> @@ -267,14 +267,30 @@
>>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>>       __ jcc(Assembler::belowEqual, done);
>>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
>> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
>> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>>       __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>>       __ jccb(Assembler::belowEqual, ext_cpuid5);
>>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>       __ jccb(Assembler::belowEqual, ext_cpuid7);
>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
>> supported?
>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
>> +    __ jccb(Assembler::below, ext_cpuid8);
>> +    //
>> +    // Extended cpuid(0x8000001E)
>> +    //
>> +    __ movl(rax, 0x8000001E);
>> +    __ cpuid();
>> +    __ lea(rsi, Address(rbp,
>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>> +    __ movl(Address(rsi, 0), rax);
>> +    __ movl(Address(rsi, 4), rbx);
>> +    __ movl(Address(rsi, 8), rcx);
>> +    __ movl(Address(rsi,12), rdx);
>> +
>>       //
>>       // Extended cpuid(0x80000008)
>>       //
>> +    __ bind(ext_cpuid8);
>>       __ movl(rax, 0x80000008);
>>       __ cpuid();
>>       __ lea(rsi, Address(rbp,
>> in_bytes(VM_Version::ext_cpuid8_offset())));
>> @@ -1109,11 +1125,27 @@
>>       }
>>
>>   #ifdef COMPILER2
>> -    if (MaxVectorSize > 16) {
>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>       }
>>   #endif // COMPILER2
>> +
>> +    // Some defaults for AMD family 17h
>> +    if ( cpu_family() == 0x17 ) {
>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>> Array Copy
>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>> +      }
>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>> +      }
>> +#ifdef COMPILER2
>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>> +      }
>> +#endif
>> +    }
>>     }
>>
>>     if( is_intel() ) { // Intel cpus specific settings
>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>> b/src/cpu/x86/vm/vm_version_x86.hpp
>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>> @@ -228,6 +228,15 @@
>>       } bits;
>>     };
>>
>> +  union ExtCpuid1EEbx {
>> +    uint32_t value;
>> +    struct {
>> +      uint32_t                  : 8,
>> +               threads_per_core : 8,
>> +                                : 16;
>> +    } bits;
>> +  };
>> +
>>     union XemXcr0Eax {
>>       uint32_t value;
>>       struct {
>> @@ -398,6 +407,12 @@
>>       ExtCpuid8Ecx ext_cpuid8_ecx;
>>       uint32_t     ext_cpuid8_edx; // reserved
>>
>> +    // cpuid function 0x8000001E // AMD 17h
>> +    uint32_t      ext_cpuid1E_eax;
>> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>> +    uint32_t      ext_cpuid1E_ecx;
>> +    uint32_t      ext_cpuid1E_edx; // unused currently
>> +
>>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>> register)
>>       XemXcr0Eax   xem_xcr0_eax;
>>       uint32_t     xem_xcr0_edx; // reserved
>> @@ -505,6 +520,14 @@
>>         result |= CPU_CLMUL;
>>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>         result |= CPU_RTM;
>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> +       result |= CPU_ADX;
>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> +      result |= CPU_BMI2;
>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> +      result |= CPU_SHA;
>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> +      result |= CPU_FMA;
>>
>>       // AMD features.
>>       if (is_amd()) {
>> @@ -518,16 +541,8 @@
>>       }
>>       // Intel features.
>>       if(is_intel()) {
>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> -         result |= CPU_ADX;
>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> -        result |= CPU_BMI2;
>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> -        result |= CPU_SHA;
>>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>           result |= CPU_LZCNT;
>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> -        result |= CPU_FMA;
>>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>> support for prefetchw
>>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>           result |= CPU_3DNOW_PREFETCH;
>> @@ -590,6 +605,7 @@
>>     static ByteSize ext_cpuid5_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>     static ByteSize ext_cpuid7_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>     static ByteSize ext_cpuid8_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>> +  static ByteSize ext_cpuid1E_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>     static ByteSize tpl_cpuidB0_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>     static ByteSize tpl_cpuidB1_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>     static ByteSize tpl_cpuidB2_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>> @@ -673,8 +689,12 @@
>>       if (is_intel() && supports_processor_topology()) {
>>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> -               cores_per_cpu();
>> +      if (cpu_family() >= 0x17) {
>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>> +      } else {
>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> +                 cores_per_cpu();
>> +      }
>>       }
>>       return (result == 0 ? 1 : result);
>>     }
>>
>> Please let me know your comments.
>> Thanks for your review.
>>
>> Regards,
>> Rohit
>>
>>>
>>>
>>> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
>>>>
>>>>
>>>> Hello David,
>>>>
>>>>>>
>>>>>>
>>>>>> 1. ExtCpuid1EEx
>>>>>>
>>>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>>>>> inconsistent - and potentially confusing: I would have preferred to
>>>>>> see
>>>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>>>>
>>>>>
>>>>>
>>>>> Yes, I can change it accordingly.
>>>>>
>>>>
>>>> I have attached the updated, re-tested patch as per your comments above.
>>>>
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>> @@ -70,7 +70,7 @@
>>>>        bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>
>>>>        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>> done, wrapup;
>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>> ext_cpuid8, done, wrapup;
>>>>        Label legacy_setup, save_restore_except, legacy_save_restore,
>>>> start_simd_check;
>>>>
>>>>        StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>> @@ -272,9 +272,23 @@
>>>>        __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>>>>        __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>> +    //
>>>> +    // Extended cpuid(0x8000001E)
>>>> +    //
>>>> +    __ movl(rax, 0x8000001E);
>>>> +    __ cpuid();
>>>> +    __ lea(rsi, Address(rbp,
>>>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
>>>> +    __ movl(Address(rsi, 0), rax);
>>>> +    __ movl(Address(rsi, 4), rbx);
>>>> +    __ movl(Address(rsi, 8), rcx);
>>>> +    __ movl(Address(rsi,12), rdx);
>>>> +
>>>>        //
>>>>        // Extended cpuid(0x80000008)
>>>>        //
>>>> +    __ bind(ext_cpuid8);
>>>>        __ movl(rax, 0x80000008);
>>>>        __ cpuid();
>>>>        __ lea(rsi, Address(rbp,
>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>> @@ -1109,11 +1123,27 @@
>>>>        }
>>>>
>>>>    #ifdef COMPILER2
>>>> -    if (MaxVectorSize > 16) {
>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>        }
>>>>    #endif // COMPILER2
>>>> +
>>>> +    // Some defaults for AMD family 17h
>>>> +    if ( cpu_family() == 0x17 ) {
>>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
>>>> Array Copy
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>> +      }
>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>> +      }
>>>> +#ifdef COMPILER2
>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>> +      }
>>>> +#endif
>>>> +    }
>>>>      }
>>>>
>>>>      if( is_intel() ) { // Intel cpus specific settings
>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>> @@ -228,6 +228,15 @@
>>>>        } bits;
>>>>      };
>>>>
>>>> +  union ExtCpuid_1E_Ebx {
>>>> +    uint32_t value;
>>>> +    struct {
>>>> +      uint32_t                  : 8,
>>>> +               threads_per_core : 8,
>>>> +                                : 16;
>>>> +    } bits;
>>>> +  };
>>>> +
>>>>      union XemXcr0Eax {
>>>>        uint32_t value;
>>>>        struct {
>>>> @@ -398,6 +407,12 @@
>>>>        ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>        uint32_t     ext_cpuid8_edx; // reserved
>>>>
>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>> +    uint32_t        ext_cpuid_1E_eax;
>>>> +    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
>>>> +    uint32_t        ext_cpuid_1E_ecx;
>>>> +    uint32_t        ext_cpuid_1E_edx; // unused currently
>>>> +
>>>>        // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>> register)
>>>>        XemXcr0Eax   xem_xcr0_eax;
>>>>        uint32_t     xem_xcr0_edx; // reserved
>>>> @@ -505,6 +520,14 @@
>>>>          result |= CPU_CLMUL;
>>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>          result |= CPU_RTM;
>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> +       result |= CPU_ADX;
>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> +      result |= CPU_BMI2;
>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> +      result |= CPU_SHA;
>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> +      result |= CPU_FMA;
>>>>
>>>>        // AMD features.
>>>>        if (is_amd()) {
>>>> @@ -518,16 +541,8 @@
>>>>        }
>>>>        // Intel features.
>>>>        if(is_intel()) {
>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>> -         result |= CPU_ADX;
>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>> -        result |= CPU_BMI2;
>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>> -        result |= CPU_SHA;
>>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>            result |= CPU_LZCNT;
>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>> -        result |= CPU_FMA;
>>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>> support for prefetchw
>>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>            result |= CPU_3DNOW_PREFETCH;
>>>> @@ -590,6 +605,7 @@
>>>>      static ByteSize ext_cpuid5_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>      static ByteSize ext_cpuid7_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>      static ByteSize ext_cpuid8_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>> +  static ByteSize ext_cpuid_1E_offset() { return
>>>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>>>>      static ByteSize tpl_cpuidB0_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>      static ByteSize tpl_cpuidB1_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>      static ByteSize tpl_cpuidB2_offset() { return
>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>> @@ -673,8 +689,11 @@
>>>>        if (is_intel() && supports_processor_topology()) {
>>>>          result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>        } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>> -               cores_per_cpu();
>>>> +      if (cpu_family() >= 0x17)
>>>> +        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core +
>>>> 1;
>>>> +      else
>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>> +                 cores_per_cpu();
>>>>        }
>>>>        return (result == 0 ? 1 : result);
>>>>      }
>>>>
>>>>
>>>> Please let me know your comments
>>>>
>>>> Thanks for your time.
>>>>
>>>> Regards,
>>>> Rohit
>>>>
>>>>
>>>>>> Thanks,
>>>>>> David
>>>>>> -----
>>>>>>
>>>>>>
>>>>>>> Reference:
>>>>>>>
>>>>>>>
>>>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
>>>>>>> [Pg 82]
>>>>>>>
>>>>>>>        CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>>>>          15:8 ThreadsPerCore: threads per core. Read-only. Reset:
>>>>>>> XXh.
>>>>>>> The number of threads per core is ThreadsPerCore+1.
>>>>>>>
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> @@ -70,7 +70,7 @@
>>>>>>>         bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>>
>>>>>>>         Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>>> done, wrapup;
>>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>>> ext_cpuid8, done, wrapup;
>>>>>>>         Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>>>> start_simd_check;
>>>>>>>
>>>>>>>         StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>>>> @@ -272,9 +272,23 @@
>>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>>         __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
>>>>>>> supported?
>>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
>>>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>>> +    //
>>>>>>> +    // Extended cpuid(0x8000001E)
>>>>>>> +    //
>>>>>>> +    __ movl(rax, 0x8000001E);
>>>>>>> +    __ cpuid();
>>>>>>> +    __ lea(rsi, Address(rbp,
>>>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>>>> +    __ movl(Address(rsi, 0), rax);
>>>>>>> +    __ movl(Address(rsi, 4), rbx);
>>>>>>> +    __ movl(Address(rsi, 8), rcx);
>>>>>>> +    __ movl(Address(rsi,12), rdx);
>>>>>>> +
>>>>>>>         //
>>>>>>>         // Extended cpuid(0x80000008)
>>>>>>>         //
>>>>>>> +    __ bind(ext_cpuid8);
>>>>>>>         __ movl(rax, 0x80000008);
>>>>>>>         __ cpuid();
>>>>>>>         __ lea(rsi, Address(rbp,
>>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>>         }
>>>>>>>
>>>>>>>     #ifdef COMPILER2
>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>         }
>>>>>>>     #endif // COMPILER2
>>>>>>> +
>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>> for
>>>>>>> Array Copy
>>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>> +      }
>>>>>>> +      if (supports_sse2() &&
>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>> {
>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>> +      }
>>>>>>> +#ifdef COMPILER2
>>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>> +      }
>>>>>>> +#endif
>>>>>>> +    }
>>>>>>>       }
>>>>>>>
>>>>>>>       if( is_intel() ) { // Intel cpus specific settings
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> @@ -228,6 +228,15 @@
>>>>>>>         } bits;
>>>>>>>       };
>>>>>>>
>>>>>>> +  union ExtCpuid1EEx {
>>>>>>> +    uint32_t value;
>>>>>>> +    struct {
>>>>>>> +      uint32_t                  : 8,
>>>>>>> +               threads_per_core : 8,
>>>>>>> +                                : 16;
>>>>>>> +    } bits;
>>>>>>> +  };
>>>>>>> +
>>>>>>>       union XemXcr0Eax {
>>>>>>>         uint32_t value;
>>>>>>>         struct {
>>>>>>> @@ -398,6 +407,12 @@
>>>>>>>         ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>>         uint32_t     ext_cpuid8_edx; // reserved
>>>>>>>
>>>>>>> +    // cpuid function 0x8000001E // AMD 17h
>>>>>>> +    uint32_t     ext_cpuid1E_eax;
>>>>>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>>>> +    uint32_t     ext_cpuid1E_ecx;
>>>>>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
>>>>>>> +
>>>>>>>         // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>>>> register)
>>>>>>>         XemXcr0Eax   xem_xcr0_eax;
>>>>>>>         uint32_t     xem_xcr0_edx; // reserved
>>>>>>> @@ -505,6 +520,14 @@
>>>>>>>           result |= CPU_CLMUL;
>>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>           result |= CPU_RTM;
>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> +       result |= CPU_ADX;
>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> +      result |= CPU_BMI2;
>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> +      result |= CPU_SHA;
>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> +      result |= CPU_FMA;
>>>>>>>
>>>>>>>         // AMD features.
>>>>>>>         if (is_amd()) {
>>>>>>> @@ -518,16 +541,8 @@
>>>>>>>         }
>>>>>>>         // Intel features.
>>>>>>>         if(is_intel()) {
>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> -         result |= CPU_ADX;
>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> -        result |= CPU_BMI2;
>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> -        result |= CPU_SHA;
>>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>             result |= CPU_LZCNT;
>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> -        result |= CPU_FMA;
>>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>> support for prefetchw
>>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>>             result |= CPU_3DNOW_PREFETCH;
>>>>>>> @@ -590,6 +605,7 @@
>>>>>>>       static ByteSize ext_cpuid5_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>>       static ByteSize ext_cpuid7_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>>       static ByteSize ext_cpuid8_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>>> +  static ByteSize ext_cpuid1E_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>>>>       static ByteSize tpl_cpuidB0_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>>       static ByteSize tpl_cpuidB1_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>>       static ByteSize tpl_cpuidB2_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>>> @@ -673,8 +689,11 @@
>>>>>>>         if (is_intel() && supports_processor_topology()) {
>>>>>>>           result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>>>         } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>> -               cores_per_cpu();
>>>>>>> +      if (cpu_family() >= 0x17)
>>>>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
>>>>>>> 1;
>>>>>>> +      else
>>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>> +                 cores_per_cpu();
>>>>>>>         }
>>>>>>>         return (result == 0 ? 1 : result);
>>>>>>>       }
>>>>>>>
>>>>>>> I have attached the patch for review.
>>>>>>> Please let me know your comments.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Rohit
>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Vladimir
>>>>>>>>
>>>>>>>>
>>>>>>>>>
>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>
>>>>>>>>> No comments on AMD specific changes.
>>>>>>>>>
>>>>>>>>> Thanks,
>>>>>>>>> David
>>>>>>>>> -----
>>>>>>>>>
>>>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Hello David,
>>>>>>>>>>>
>>>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>
>>>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot
>>>>>>>>>>>> repo.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
>>>>>>>>>>> 13548:1a9c2e07a826]
>>>>>>>>>>> and was able to apply the patch
>>>>>>>>>>> [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>>>>> without any issues.
>>>>>>>>>>> Can you share the error message that you are getting?
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> I was getting this:
>>>>>>>>>>
>>>>>>>>>> applying hotspot.patch
>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> Hunk #1 FAILED at 1108
>>>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> Hunk #2 FAILED at 522
>>>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>>>>> abort: patch failed to apply
>>>>>>>>>>
>>>>>>>>>> but I started again and this time it applied fine, so not sure
>>>>>>>>>> what
>>>>>>>>>> was
>>>>>>>>>> going on there.
>>>>>>>>>>
>>>>>>>>>> Cheers,
>>>>>>>>>> David
>>>>>>>>>>
>>>>>>>>>>> Regards,
>>>>>>>>>>> Rohit
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>
>>>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Changes look good. Only question I have is about
>>>>>>>>>>>>>>>> MaxVectorSize.
>>>>>>>>>>>>>>>> It
>>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>>> set
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for
>>>>>>>>>>>>>>> AMD
>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>> So
>>>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
>>>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>>> I
>>>>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> My older patch had the below mentioned check which was required
>>>>>>>>>>>>> on
>>>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been
>>>>>>>>>>>>> handled
>>>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>>>>
>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>> ...
>>>>>>>>>>>>> ...
>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> ..
>>>>>>>>>>>>> ..
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag
>>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is
>>>>>>>>>>>>>>> there
>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>> underlying reason for this? I have handled this in the patch
>>>>>>>>>>>>>>> but
>>>>>>>>>>>>>>> just
>>>>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>>>>> instructions
>>>>>>>>>>>>>> to
>>>>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
>>>>>>>>>>>>>> execute
>>>>>>>>>>>>>> that
>>>>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
>>>>>>>>>>>>> instructions,
>>>>>>>>>>>>> it should work.
>>>>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>>>>>>>
>>>>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>
>>>>>>>>>>>>>       #ifdef COMPILER2
>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>       #endif // COMPILER2
>>>>>>>>>>>>> +
>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>> for
>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>> +      if (supports_sse4_2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>> +      }
>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>         }
>>>>>>>>>>>>>
>>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>             result |= CPU_CLMUL;
>>>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>             result |= CPU_RTM;
>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>
>>>>>>>>>>>>>           // AMD features.
>>>>>>>>>>>>>           if (is_amd()) {
>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>               result |= CPU_SSE4A;
>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>           }
>>>>>>>>>>>>>           // Intel features.
>>>>>>>>>>>>>           if(is_intel()) {
>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel !=
>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>               result |= CPU_LZCNT;
>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>> indicates
>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse !=
>>>>>>>>>>>>> 0)
>>>>>>>>>>>>> {
>>>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>
>>>>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks for your time.
>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on this
>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>            // some defaults for AMD family 15h
>>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>        #ifdef COMPILER2
>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>        #endif // COMPILER2
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>          }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>              result |= CPU_CLMUL;
>>>>>>>>>>>>>>>            if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>              result |= CPU_RTM;
>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>            // AMD features.
>>>>>>>>>>>>>>>            if (is_amd()) {
>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>>                result |= CPU_SSE4A;
>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>            // Intel features.
>>>>>>>>>>>>>>>            if(is_intel()) {
>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>              if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>                result |= CPU_LZCNT;
>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>              // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>>                result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already
>>>>>>>>>>>>>>>>>>> see
>>>>>>>>>>>>>>>>>>> a
>>>>>>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source
>>>>>>>>>>>>>>>>>> base,
>>>>>>>>>>>>>>>>>> test
>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg
>>>>>>>>>>>>>>>>> ($make
>>>>>>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Can anyone please volunteer to review this patch  which
>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> ************************* Patch
>>>>>>>>>>>>>>>>> ****************************
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>>>               }
>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>> this
>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>             // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>             if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>         #ifdef COMPILER2
>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>         #endif // COMPILER2
>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>> crypto
>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>           }
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>           if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>>               result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>>             if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>>>               result |= CPU_RTM;
>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
>>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
>>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
>>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>             // AMD features.
>>>>>>>>>>>>>>>>>             if (is_amd()) {
>>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>                 result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>             }
>>>>>>>>>>>>>>>>>             // Intel features.
>>>>>>>>>>>>>>>>>             if(is_intel()) {
>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
>>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>>>> != 0)
>>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>               // for Intel, ecx.bits.misalignsse bit (bit
>>>>>>>>>>>>>>>>> 8)
>>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>>               if
>>>>>>>>>>>>>>>>> (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>>>>                 result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> **************************************************************
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch
>>>>>>>>>>>>>>>>>>>>>> (openJDK9)
>>>>>>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor
>>>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems
>>>>>>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for
>>>>>>>>>>>>>>>>>>>>>> reference.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the
>>>>>>>>>>>>>>>>>>>>> mail
>>>>>>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you
>>>>>>>>>>>>>>>>>>>>> will
>>>>>>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to
>>>>>>>>>>>>>>>>>>>>> comment
>>>>>>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>>>>>>                }
>>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(UseSSE42Intrinsics,
>>>>>>>>>>>>>>>>>>>> false);
>>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
>>>>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>>>>> this
>>>>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>              // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>>>>              if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>          #ifdef COMPILER2
>>>>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
>>>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>>          #endif // COMPILER2
>>>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
>>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +      if (UseSHA) {
>>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>>>> crypto
>>>>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>>>> +        }
>>>>>>>>>>>>>>>>>>>> +      }
>>>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>>>> +    }
>>>>>>>>>>>>>>>>>>>>            }
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>            if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>>>>>>                  result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>>>>                if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a
>>>>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>>>>                  result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
>>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>>>> +        result |= CPU_ADX;
>>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>>>> +        result |= CPU_SHA;
>>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>>>> +        result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>>>              }
>>>>>>>>>>>>>>>>>>>>              // Intel features.
>>>>>>>>>>>>>>>>>>>>              if(is_intel()) {
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>
>>>>> >


More information about the hotspot-dev mailing list