RFR: Newer AMD 17h (EPYC) Processor family defaults

Fri Nov 3 03:30:41 UTC 2017

Hello Vladimir,

Is there any update on pushing these changes to jdk10?

Thanks,
Rohit

On Fri, Sep 22, 2017 at 1:11 PM, Rohit Arul Raj <rohitarulraj at gmail.com>
wrote:

> Thanks Vladimir,
>
> On Wed, Sep 20, 2017 at 10:07 PM, Vladimir Kozlov
> <vladimir.kozlov at oracle.com> wrote:
> >>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
> >>       __ jcc(Assembler::belowEqual, done);
> >>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
> >> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
> >> +   __ jcc(Assembler::belowEqual, ext_cpuid1);
> >
> >
> > Good. You may need to increase size of the buffer too (to be safe) to
> 1100:
> >
> > static const int stub_size = 1000;
> >
>
> Please find the updated patch after the requested change.
>
> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> b/src/cpu/x86/vm/vm_version_x86.cpp
> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> @@ -46,7 +46,7 @@
>  address VM_Version::_cpuinfo_cont_addr = 0;
>
>  static BufferBlob* stub_blob;
> -static const int stub_size = 1000;
> +static const int stub_size = 1100;
>
>  extern "C" {
>    typedef void (*get_cpu_info_stub_t)(void*);
> @@ -70,7 +70,7 @@
>      bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>
>      Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> done, wrapup;
> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> ext_cpuid8, done, wrapup;
>      Label legacy_setup, save_restore_except, legacy_save_restore,
> start_simd_check;
>
>      StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> @@ -267,14 +267,30 @@
>      __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
>      __ jcc(Assembler::belowEqual, done);
>      __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
>      __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
>      __ jccb(Assembler::belowEqual, ext_cpuid5);
>      __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
>      __ jccb(Assembler::belowEqual, ext_cpuid7);
> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
> supported?
> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
> +    __ jccb(Assembler::below, ext_cpuid8);
> +    //
> +    // Extended cpuid(0x8000001E)
> +    //
> +    __ movl(rax, 0x8000001E);
> +    __ cpuid();
> +    __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_
> cpuid1E_offset())));
> +    __ movl(Address(rsi, 0), rax);
> +    __ movl(Address(rsi, 4), rbx);
> +    __ movl(Address(rsi, 8), rcx);
> +    __ movl(Address(rsi,12), rdx);
> +
>      //
>      // Extended cpuid(0x80000008)
>      //
> +    __ bind(ext_cpuid8);
>      __ movl(rax, 0x80000008);
>      __ cpuid();
>      __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
> @@ -1109,11 +1125,27 @@
>      }
>
>  #ifdef COMPILER2
> -    if (MaxVectorSize > 16) {
> -      // Limit vectors size to 16 bytes on current AMD cpus.
> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
>        FLAG_SET_DEFAULT(MaxVectorSize, 16);
>      }
>  #endif // COMPILER2
> +
> +    // Some defaults for AMD family 17h
> +    if ( cpu_family() == 0x17 ) {
> +      // On family 17h processors use XMM and UnalignedLoadStores for
> Array Copy
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> +      }
> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> +      }
> +#ifdef COMPILER2
> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> +      }
> +#endif
> +    }
>    }
>
>    if( is_intel() ) { // Intel cpus specific settings
> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> b/src/cpu/x86/vm/vm_version_x86.hpp
> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> @@ -228,6 +228,15 @@
>      } bits;
>    };
>
> +  union ExtCpuid1EEbx {
> +    uint32_t value;
> +    struct {
> +      uint32_t                  : 8,
> +               threads_per_core : 8,
> +                                : 16;
> +    } bits;
> +  };
> +
>    union XemXcr0Eax {
>      uint32_t value;
>      struct {
> @@ -398,6 +407,12 @@
>      ExtCpuid8Ecx ext_cpuid8_ecx;
>      uint32_t     ext_cpuid8_edx; // reserved
>
> +    // cpuid function 0x8000001E // AMD 17h
> +    uint32_t      ext_cpuid1E_eax;
> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
> +    uint32_t      ext_cpuid1E_ecx;
> +    uint32_t      ext_cpuid1E_edx; // unused currently
> +
>      // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
>      XemXcr0Eax   xem_xcr0_eax;
>      uint32_t     xem_xcr0_edx; // reserved
> @@ -505,6 +520,14 @@
>        result |= CPU_CLMUL;
>      if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>        result |= CPU_RTM;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> +       result |= CPU_ADX;
> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> +      result |= CPU_BMI2;
> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> +      result |= CPU_SHA;
> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> +      result |= CPU_FMA;
>
>      // AMD features.
>      if (is_amd()) {
> @@ -518,16 +541,8 @@
>      }
>      // Intel features.
>      if(is_intel()) {
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> -         result |= CPU_ADX;
> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> -        result |= CPU_BMI2;
> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> -        result |= CPU_SHA;
>        if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>          result |= CPU_LZCNT;
> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> -        result |= CPU_FMA;
>        // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> support for prefetchw
>        if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>          result |= CPU_3DNOW_PREFETCH;
> @@ -590,6 +605,7 @@
>    static ByteSize ext_cpuid5_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>    static ByteSize ext_cpuid7_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>    static ByteSize ext_cpuid8_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> +  static ByteSize ext_cpuid1E_offset() { return
> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>    static ByteSize tpl_cpuidB0_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>    static ByteSize tpl_cpuidB1_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>    static ByteSize tpl_cpuidB2_offset() { return
> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> @@ -673,8 +689,12 @@
>      if (is_intel() && supports_processor_topology()) {
>        result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>      } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> -               cores_per_cpu();
> +      if (cpu_family() >= 0x17) {
> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
> +      } else {
> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> +                 cores_per_cpu();
> +      }
>      }
>      return (result == 0 ? 1 : result);
>    }
>
> Regards,
> Rohit
>
> >>
> >> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >> b/src/cpu/x86/vm/vm_version_x86.cpp
> >> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >> @@ -70,7 +70,7 @@
> >>       bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
> >>
> >>       Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
> >> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> >> done, wrapup;
> >> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> >> ext_cpuid8, done, wrapup;
> >>       Label legacy_setup, save_restore_except, legacy_save_restore,
> >> start_simd_check;
> >>
> >>       StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> >> @@ -267,14 +267,30 @@
> >>       __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
> >>       __ jcc(Assembler::belowEqual, done);
> >>       __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
> >> -    __ jccb(Assembler::belowEqual, ext_cpuid1);
> >> +    __ jcc(Assembler::belowEqual, ext_cpuid1);
> >>       __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
> >>       __ jccb(Assembler::belowEqual, ext_cpuid5);
> >>       __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
> >>       __ jccb(Assembler::belowEqual, ext_cpuid7);
> >> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above)
> >> supported?
> >> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> >> +    __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
> >> +    __ jccb(Assembler::below, ext_cpuid8);
> >> +    //
> >> +    // Extended cpuid(0x8000001E)
> >> +    //
> >> +    __ movl(rax, 0x8000001E);
> >> +    __ cpuid();
> >> +    __ lea(rsi, Address(rbp,
> >> in_bytes(VM_Version::ext_cpuid1E_offset())));
> >> +    __ movl(Address(rsi, 0), rax);
> >> +    __ movl(Address(rsi, 4), rbx);
> >> +    __ movl(Address(rsi, 8), rcx);
> >> +    __ movl(Address(rsi,12), rdx);
> >> +
> >>       //
> >>       // Extended cpuid(0x80000008)
> >>       //
> >> +    __ bind(ext_cpuid8);
> >>       __ movl(rax, 0x80000008);
> >>       __ cpuid();
> >>       __ lea(rsi, Address(rbp,
> >> in_bytes(VM_Version::ext_cpuid8_offset())));
> >> @@ -1109,11 +1125,27 @@
> >>       }
> >>
> >>   #ifdef COMPILER2
> >> -    if (MaxVectorSize > 16) {
> >> -      // Limit vectors size to 16 bytes on current AMD cpus.
> >> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
> >>         FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>       }
> >>   #endif // COMPILER2
> >> +
> >> +    // Some defaults for AMD family 17h
> >> +    if ( cpu_family() == 0x17 ) {
> >> +      // On family 17h processors use XMM and UnalignedLoadStores for
> >> Array Copy
> >> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> >> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> >> +      }
> >> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
> {
> >> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> >> +      }
> >> +#ifdef COMPILER2
> >> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> >> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >> +      }
> >> +#endif
> >> +    }
> >>     }
> >>
> >>     if( is_intel() ) { // Intel cpus specific settings
> >> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >> b/src/cpu/x86/vm/vm_version_x86.hpp
> >> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >> @@ -228,6 +228,15 @@
> >>       } bits;
> >>     };
> >>
> >> +  union ExtCpuid1EEbx {
> >> +    uint32_t value;
> >> +    struct {
> >> +      uint32_t                  : 8,
> >> +               threads_per_core : 8,
> >> +                                : 16;
> >> +    } bits;
> >> +  };
> >> +
> >>     union XemXcr0Eax {
> >>       uint32_t value;
> >>       struct {
> >> @@ -398,6 +407,12 @@
> >>       ExtCpuid8Ecx ext_cpuid8_ecx;
> >>       uint32_t     ext_cpuid8_edx; // reserved
> >>
> >> +    // cpuid function 0x8000001E // AMD 17h
> >> +    uint32_t      ext_cpuid1E_eax;
> >> +    ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
> >> +    uint32_t      ext_cpuid1E_ecx;
> >> +    uint32_t      ext_cpuid1E_edx; // unused currently
> >> +
> >>       // extended control register XCR0 (the XFEATURE_ENABLED_MASK
> >> register)
> >>       XemXcr0Eax   xem_xcr0_eax;
> >>       uint32_t     xem_xcr0_edx; // reserved
> >> @@ -505,6 +520,14 @@
> >>         result |= CPU_CLMUL;
> >>       if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
> >>         result |= CPU_RTM;
> >> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >> +       result |= CPU_ADX;
> >> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >> +      result |= CPU_BMI2;
> >> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >> +      result |= CPU_SHA;
> >> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >> +      result |= CPU_FMA;
> >>
> >>       // AMD features.
> >>       if (is_amd()) {
> >> @@ -518,16 +541,8 @@
> >>       }
> >>       // Intel features.
> >>       if(is_intel()) {
> >> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >> -         result |= CPU_ADX;
> >> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >> -        result |= CPU_BMI2;
> >> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >> -        result |= CPU_SHA;
> >>         if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
> >>           result |= CPU_LZCNT;
> >> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >> -        result |= CPU_FMA;
> >>         // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> >> support for prefetchw
> >>         if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
> >>           result |= CPU_3DNOW_PREFETCH;
> >> @@ -590,6 +605,7 @@
> >>     static ByteSize ext_cpuid5_offset() { return
> >> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
> >>     static ByteSize ext_cpuid7_offset() { return
> >> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
> >>     static ByteSize ext_cpuid8_offset() { return
> >> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> >> +  static ByteSize ext_cpuid1E_offset() { return
> >> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
> >>     static ByteSize tpl_cpuidB0_offset() { return
> >> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
> >>     static ByteSize tpl_cpuidB1_offset() { return
> >> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
> >>     static ByteSize tpl_cpuidB2_offset() { return
> >> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> >> @@ -673,8 +689,12 @@
> >>       if (is_intel() && supports_processor_topology()) {
> >>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
> >>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> >> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> >> -               cores_per_cpu();
> >> +      if (cpu_family() >= 0x17) {
> >> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
> 1;
> >> +      } else {
> >> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> >> +                 cores_per_cpu();
> >> +      }
> >>       }
> >>       return (result == 0 ? 1 : result);
> >>     }
> >>
> >> Please let me know your comments.
> >> Thanks for your review.
> >>
> >> Regards,
> >> Rohit
> >>
> >>>
> >>>
> >>> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
> >>>>
> >>>>
> >>>> Hello David,
> >>>>
> >>>>>>
> >>>>>>
> >>>>>> 1. ExtCpuid1EEx
> >>>>>>
> >>>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
> >>>>>> inconsistent - and potentially confusing: I would have preferred to
> >>>>>> see
> >>>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
> >>>>>
> >>>>>
> >>>>>
> >>>>> Yes, I can change it accordingly.
> >>>>>
> >>>>
> >>>> I have attached the updated, re-tested patch as per your comments
> above.
> >>>>
> >>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>> b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>> @@ -70,7 +70,7 @@
> >>>>        bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
> >>>>
> >>>>        Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
> >>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> >>>> done, wrapup;
> >>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
> >>>> ext_cpuid8, done, wrapup;
> >>>>        Label legacy_setup, save_restore_except, legacy_save_restore,
> >>>> start_simd_check;
> >>>>
> >>>>        StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> >>>> @@ -272,9 +272,23 @@
> >>>>        __ jccb(Assembler::belowEqual, ext_cpuid5);
> >>>>        __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
> supported?
> >>>>        __ jccb(Assembler::belowEqual, ext_cpuid7);
> >>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E) supported?
> >>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> >>>> +    //
> >>>> +    // Extended cpuid(0x8000001E)
> >>>> +    //
> >>>> +    __ movl(rax, 0x8000001E);
> >>>> +    __ cpuid();
> >>>> +    __ lea(rsi, Address(rbp,
> >>>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
> >>>> +    __ movl(Address(rsi, 0), rax);
> >>>> +    __ movl(Address(rsi, 4), rbx);
> >>>> +    __ movl(Address(rsi, 8), rcx);
> >>>> +    __ movl(Address(rsi,12), rdx);
> >>>> +
> >>>>        //
> >>>>        // Extended cpuid(0x80000008)
> >>>>        //
> >>>> +    __ bind(ext_cpuid8);
> >>>>        __ movl(rax, 0x80000008);
> >>>>        __ cpuid();
> >>>>        __ lea(rsi, Address(rbp,
> >>>> in_bytes(VM_Version::ext_cpuid8_offset())));
> >>>> @@ -1109,11 +1123,27 @@
> >>>>        }
> >>>>
> >>>>    #ifdef COMPILER2
> >>>> -    if (MaxVectorSize > 16) {
> >>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
> >>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
> >>>>          FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>>>        }
> >>>>    #endif // COMPILER2
> >>>> +
> >>>> +    // Some defaults for AMD family 17h
> >>>> +    if ( cpu_family() == 0x17 ) {
> >>>> +      // On family 17h processors use XMM and UnalignedLoadStores for
> >>>> Array Copy
> >>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> >>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> >>>> +      }
> >>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
> {
> >>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> >>>> +      }
> >>>> +#ifdef COMPILER2
> >>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> >>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >>>> +      }
> >>>> +#endif
> >>>> +    }
> >>>>      }
> >>>>
> >>>>      if( is_intel() ) { // Intel cpus specific settings
> >>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>> b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>> @@ -228,6 +228,15 @@
> >>>>        } bits;
> >>>>      };
> >>>>
> >>>> +  union ExtCpuid_1E_Ebx {
> >>>> +    uint32_t value;
> >>>> +    struct {
> >>>> +      uint32_t                  : 8,
> >>>> +               threads_per_core : 8,
> >>>> +                                : 16;
> >>>> +    } bits;
> >>>> +  };
> >>>> +
> >>>>      union XemXcr0Eax {
> >>>>        uint32_t value;
> >>>>        struct {
> >>>> @@ -398,6 +407,12 @@
> >>>>        ExtCpuid8Ecx ext_cpuid8_ecx;
> >>>>        uint32_t     ext_cpuid8_edx; // reserved
> >>>>
> >>>> +    // cpuid function 0x8000001E // AMD 17h
> >>>> +    uint32_t        ext_cpuid_1E_eax;
> >>>> +    ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
> >>>> +    uint32_t        ext_cpuid_1E_ecx;
> >>>> +    uint32_t        ext_cpuid_1E_edx; // unused currently
> >>>> +
> >>>>        // extended control register XCR0 (the XFEATURE_ENABLED_MASK
> >>>> register)
> >>>>        XemXcr0Eax   xem_xcr0_eax;
> >>>>        uint32_t     xem_xcr0_edx; // reserved
> >>>> @@ -505,6 +520,14 @@
> >>>>          result |= CPU_CLMUL;
> >>>>        if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
> >>>>          result |= CPU_RTM;
> >>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>> +       result |= CPU_ADX;
> >>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>> +      result |= CPU_BMI2;
> >>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>> +      result |= CPU_SHA;
> >>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>> +      result |= CPU_FMA;
> >>>>
> >>>>        // AMD features.
> >>>>        if (is_amd()) {
> >>>> @@ -518,16 +541,8 @@
> >>>>        }
> >>>>        // Intel features.
> >>>>        if(is_intel()) {
> >>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>> -         result |= CPU_ADX;
> >>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>> -        result |= CPU_BMI2;
> >>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>> -        result |= CPU_SHA;
> >>>>          if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
> >>>>            result |= CPU_LZCNT;
> >>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>> -        result |= CPU_FMA;
> >>>>          // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> >>>> support for prefetchw
> >>>>          if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
> >>>>            result |= CPU_3DNOW_PREFETCH;
> >>>> @@ -590,6 +605,7 @@
> >>>>      static ByteSize ext_cpuid5_offset() { return
> >>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
> >>>>      static ByteSize ext_cpuid7_offset() { return
> >>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
> >>>>      static ByteSize ext_cpuid8_offset() { return
> >>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> >>>> +  static ByteSize ext_cpuid_1E_offset() { return
> >>>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
> >>>>      static ByteSize tpl_cpuidB0_offset() { return
> >>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
> >>>>      static ByteSize tpl_cpuidB1_offset() { return
> >>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
> >>>>      static ByteSize tpl_cpuidB2_offset() { return
> >>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> >>>> @@ -673,8 +689,11 @@
> >>>>        if (is_intel() && supports_processor_topology()) {
> >>>>          result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
> >>>>        } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> >>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> >>>> -               cores_per_cpu();
> >>>> +      if (cpu_family() >= 0x17)
> >>>> +        result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core
> +
> >>>> 1;
> >>>> +      else
> >>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> >>>> +                 cores_per_cpu();
> >>>>        }
> >>>>        return (result == 0 ? 1 : result);
> >>>>      }
> >>>>
> >>>>
> >>>> Please let me know your comments
> >>>>
> >>>> Thanks for your time.
> >>>>
> >>>> Regards,
> >>>> Rohit
> >>>>
> >>>>
> >>>>>> Thanks,
> >>>>>> David
> >>>>>> -----
> >>>>>>
> >>>>>>
> >>>>>>> Reference:
> >>>>>>>
> >>>>>>>
> >>>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_
> Models_00h-0Fh.pdf
> >>>>>>> [Pg 82]
> >>>>>>>
> >>>>>>>        CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
> >>>>>>>          15:8 ThreadsPerCore: threads per core. Read-only. Reset:
> >>>>>>> XXh.
> >>>>>>> The number of threads per core is ThreadsPerCore+1.
> >>>>>>>
> >>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>> @@ -70,7 +70,7 @@
> >>>>>>>         bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
> >>>>>>>
> >>>>>>>         Label detect_486, cpu486, detect_586, std_cpuid1,
> std_cpuid4;
> >>>>>>> -    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
> ext_cpuid7,
> >>>>>>> done, wrapup;
> >>>>>>> +    Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
> ext_cpuid7,
> >>>>>>> ext_cpuid8, done, wrapup;
> >>>>>>>         Label legacy_setup, save_restore_except,
> legacy_save_restore,
> >>>>>>> start_simd_check;
> >>>>>>>
> >>>>>>>         StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
> >>>>>>> @@ -272,9 +272,23 @@
> >>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid5);
> >>>>>>>         __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008)
> >>>>>>> supported?
> >>>>>>>         __ jccb(Assembler::belowEqual, ext_cpuid7);
> >>>>>>> +    __ cmpl(rax, 0x80000008);     // Is cpuid(0x8000001E)
> supported?
> >>>>>>> +    __ jccb(Assembler::belowEqual, ext_cpuid8);
> >>>>>>> +    //
> >>>>>>> +    // Extended cpuid(0x8000001E)
> >>>>>>> +    //
> >>>>>>> +    __ movl(rax, 0x8000001E);
> >>>>>>> +    __ cpuid();
> >>>>>>> +    __ lea(rsi, Address(rbp,
> >>>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
> >>>>>>> +    __ movl(Address(rsi, 0), rax);
> >>>>>>> +    __ movl(Address(rsi, 4), rbx);
> >>>>>>> +    __ movl(Address(rsi, 8), rcx);
> >>>>>>> +    __ movl(Address(rsi,12), rdx);
> >>>>>>> +
> >>>>>>>         //
> >>>>>>>         // Extended cpuid(0x80000008)
> >>>>>>>         //
> >>>>>>> +    __ bind(ext_cpuid8);
> >>>>>>>         __ movl(rax, 0x80000008);
> >>>>>>>         __ cpuid();
> >>>>>>>         __ lea(rsi, Address(rbp,
> >>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
> >>>>>>> @@ -1109,11 +1123,27 @@
> >>>>>>>         }
> >>>>>>>
> >>>>>>>     #ifdef COMPILER2
> >>>>>>> -    if (MaxVectorSize > 16) {
> >>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
> >>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
> >>>>>>>           FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>>>>>>         }
> >>>>>>>     #endif // COMPILER2
> >>>>>>> +
> >>>>>>> +    // Some defaults for AMD family 17h
> >>>>>>> +    if ( cpu_family() == 0x17 ) {
> >>>>>>> +      // On family 17h processors use XMM and UnalignedLoadStores
> >>>>>>> for
> >>>>>>> Array Copy
> >>>>>>> +      if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy))
> {
> >>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> >>>>>>> +      }
> >>>>>>> +      if (supports_sse2() &&
> >>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
> >>>>>>> {
> >>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> >>>>>>> +      }
> >>>>>>> +#ifdef COMPILER2
> >>>>>>> +      if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
> {
> >>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >>>>>>> +      }
> >>>>>>> +#endif
> >>>>>>> +    }
> >>>>>>>       }
> >>>>>>>
> >>>>>>>       if( is_intel() ) { // Intel cpus specific settings
> >>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>> @@ -228,6 +228,15 @@
> >>>>>>>         } bits;
> >>>>>>>       };
> >>>>>>>
> >>>>>>> +  union ExtCpuid1EEx {
> >>>>>>> +    uint32_t value;
> >>>>>>> +    struct {
> >>>>>>> +      uint32_t                  : 8,
> >>>>>>> +               threads_per_core : 8,
> >>>>>>> +                                : 16;
> >>>>>>> +    } bits;
> >>>>>>> +  };
> >>>>>>> +
> >>>>>>>       union XemXcr0Eax {
> >>>>>>>         uint32_t value;
> >>>>>>>         struct {
> >>>>>>> @@ -398,6 +407,12 @@
> >>>>>>>         ExtCpuid8Ecx ext_cpuid8_ecx;
> >>>>>>>         uint32_t     ext_cpuid8_edx; // reserved
> >>>>>>>
> >>>>>>> +    // cpuid function 0x8000001E // AMD 17h
> >>>>>>> +    uint32_t     ext_cpuid1E_eax;
> >>>>>>> +    ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
> >>>>>>> +    uint32_t     ext_cpuid1E_ecx;
> >>>>>>> +    uint32_t     ext_cpuid1E_edx; // unused currently
> >>>>>>> +
> >>>>>>>         // extended control register XCR0 (the
> XFEATURE_ENABLED_MASK
> >>>>>>> register)
> >>>>>>>         XemXcr0Eax   xem_xcr0_eax;
> >>>>>>>         uint32_t     xem_xcr0_edx; // reserved
> >>>>>>> @@ -505,6 +520,14 @@
> >>>>>>>           result |= CPU_CLMUL;
> >>>>>>>         if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
> >>>>>>>           result |= CPU_RTM;
> >>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>> +       result |= CPU_ADX;
> >>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>> +      result |= CPU_BMI2;
> >>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>> +      result |= CPU_SHA;
> >>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>> +      result |= CPU_FMA;
> >>>>>>>
> >>>>>>>         // AMD features.
> >>>>>>>         if (is_amd()) {
> >>>>>>> @@ -518,16 +541,8 @@
> >>>>>>>         }
> >>>>>>>         // Intel features.
> >>>>>>>         if(is_intel()) {
> >>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>> -         result |= CPU_ADX;
> >>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>> -        result |= CPU_BMI2;
> >>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>> -        result |= CPU_SHA;
> >>>>>>>           if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
> >>>>>>>             result |= CPU_LZCNT;
> >>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>> -        result |= CPU_FMA;
> >>>>>>>           // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
> >>>>>>> support for prefetchw
> >>>>>>>           if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
> >>>>>>>             result |= CPU_3DNOW_PREFETCH;
> >>>>>>> @@ -590,6 +605,7 @@
> >>>>>>>       static ByteSize ext_cpuid5_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
> >>>>>>>       static ByteSize ext_cpuid7_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
> >>>>>>>       static ByteSize ext_cpuid8_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
> >>>>>>> +  static ByteSize ext_cpuid1E_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
> >>>>>>>       static ByteSize tpl_cpuidB0_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
> >>>>>>>       static ByteSize tpl_cpuidB1_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
> >>>>>>>       static ByteSize tpl_cpuidB2_offset() { return
> >>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
> >>>>>>> @@ -673,8 +689,11 @@
> >>>>>>>         if (is_intel() && supports_processor_topology()) {
> >>>>>>>           result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
> >>>>>>>         } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> >>>>>>> -      result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
> >>>>>>> -               cores_per_cpu();
> >>>>>>> +      if (cpu_family() >= 0x17)
> >>>>>>> +        result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core
> +
> >>>>>>> 1;
> >>>>>>> +      else
> >>>>>>> +        result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu
> /
> >>>>>>> +                 cores_per_cpu();
> >>>>>>>         }
> >>>>>>>         return (result == 0 ? 1 : result);
> >>>>>>>       }
> >>>>>>>
> >>>>>>> I have attached the patch for review.
> >>>>>>> Please let me know your comments.
> >>>>>>>
> >>>>>>> Thanks,
> >>>>>>> Rohit
> >>>>>>>
> >>>>>>>> Thanks,
> >>>>>>>> Vladimir
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>>
> >>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>
> >>>>>>>>> No comments on AMD specific changes.
> >>>>>>>>>
> >>>>>>>>> Thanks,
> >>>>>>>>> David
> >>>>>>>>> -----
> >>>>>>>>>
> >>>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> Hello David,
> >>>>>>>>>>>
> >>>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
> >>>>>>>>>>> <david.holmes at oracle.com>
> >>>>>>>>>>> wrote:
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>> Hi Rohit,
> >>>>>>>>>>>>
> >>>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot
> >>>>>>>>>>>> repo.
> >>>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
> >>>>>>>>>>> 13548:1a9c2e07a826]
> >>>>>>>>>>> and was able to apply the patch
> >>>>>>>>>>> [epyc-amd17h-defaults-3Sept.patch]
> >>>>>>>>>>> without any issues.
> >>>>>>>>>>> Can you share the error message that you are getting?
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> I was getting this:
> >>>>>>>>>>
> >>>>>>>>>> applying hotspot.patch
> >>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>> Hunk #1 FAILED at 1108
> >>>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
> >>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
> >>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>> Hunk #2 FAILED at 522
> >>>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
> >>>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
> >>>>>>>>>> abort: patch failed to apply
> >>>>>>>>>>
> >>>>>>>>>> but I started again and this time it applied fine, so not sure
> >>>>>>>>>> what
> >>>>>>>>>> was
> >>>>>>>>>> going on there.
> >>>>>>>>>>
> >>>>>>>>>> Cheers,
> >>>>>>>>>> David
> >>>>>>>>>>
> >>>>>>>>>>> Regards,
> >>>>>>>>>>> Rohit
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Hello Vladimir,
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
> >>>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> Hi Rohit,
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Hello Vladimir,
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> Changes look good. Only question I have is about
> >>>>>>>>>>>>>>>> MaxVectorSize.
> >>>>>>>>>>>>>>>> It
> >>>>>>>>>>>>>>>> is
> >>>>>>>>>>>>>>>> set
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> 16 only in presence of AVX:
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> Does that code works for AMD 17h too?
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for
> >>>>>>>>>>>>>>> AMD
> >>>>>>>>>>>>>>> 17h.
> >>>>>>>>>>>>>>> So
> >>>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
> >>>>>>>>>>>>>>> patch.
> >>>>>>>>>>>>>>> I
> >>>>>>>>>>>>>>> have updated, re-tested and attached the patch.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> Which check you removed?
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> My older patch had the below mentioned check which was
> required
> >>>>>>>>>>>>> on
> >>>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been
> >>>>>>>>>>>>> handled
> >>>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> +    // Some defaults for AMD family 17h
> >>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
> >>>>>>>>>>>>> ...
> >>>>>>>>>>>>> ...
> >>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
> >>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
> >>>>>>>>>>>>> +      }
> >>>>>>>>>>>>> ..
> >>>>>>>>>>>>> ..
> >>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> AMD 17h has support for SHA.
> >>>>>>>>>>>>>>> AMD 15h doesn't have  support for SHA. Still "UseSHA" flag
> >>>>>>>>>>>>>>> gets
> >>>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2.
> Is
> >>>>>>>>>>>>>>> there
> >>>>>>>>>>>>>>> an
> >>>>>>>>>>>>>>> underlying reason for this? I have handled this in the
> patch
> >>>>>>>>>>>>>>> but
> >>>>>>>>>>>>>>> just
> >>>>>>>>>>>>>>> wanted to confirm.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
> >>>>>>>>>>>>>> instructions
> >>>>>>>>>>>>>> to
> >>>>>>>>>>>>>> calculate SHA-256:
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/
> 6a17c49de974
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
> >>>>>>>>>>>>>> execute
> >>>>>>>>>>>>>> that
> >>>>>>>>>>>>>> code. You need to test it.
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
> >>>>>>>>>>>>> instructions,
> >>>>>>>>>>>>> it should work.
> >>>>>>>>>>>>> Confirmed by running following sanity tests:
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
> TestSHA1Intrinsics.java
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
> TestSHA512Intrinsics.java
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
> TestSHA256Intrinsics.java
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> So I have removed those SHA checks from my patch too.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Please find attached updated, re-tested patch.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>> @@ -1109,11 +1109,27 @@
> >>>>>>>>>>>>>           }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>       #ifdef COMPILER2
> >>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
> >>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD cpus.
> >>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
> >>>>>>>>>>>>>             FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>>>>>>>>>>>>           }
> >>>>>>>>>>>>>       #endif // COMPILER2
> >>>>>>>>>>>>> +
> >>>>>>>>>>>>> +    // Some defaults for AMD family 17h
> >>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
> >>>>>>>>>>>>> +      // On family 17h processors use XMM and
> >>>>>>>>>>>>> UnalignedLoadStores
> >>>>>>>>>>>>> for
> >>>>>>>>>>>>> Array Copy
> >>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> >>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> >>>>>>>>>>>>> +      }
> >>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
> >>>>>>>>>>>>> {
> >>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> >>>>>>>>>>>>> +      }
> >>>>>>>>>>>>> +#ifdef COMPILER2
> >>>>>>>>>>>>> +      if (supports_sse4_2() &&
> >>>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
> >>>>>>>>>>>>> {
> >>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >>>>>>>>>>>>> +      }
> >>>>>>>>>>>>> +#endif
> >>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>         }
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>         if( is_intel() ) { // Intel cpus specific settings
> >>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>> @@ -505,6 +505,14 @@
> >>>>>>>>>>>>>             result |= CPU_CLMUL;
> >>>>>>>>>>>>>           if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
> >>>>>>>>>>>>>             result |= CPU_RTM;
> >>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>> +       result |= CPU_ADX;
> >>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>> +      result |= CPU_BMI2;
> >>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>> +      result |= CPU_SHA;
> >>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>> +      result |= CPU_FMA;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>           // AMD features.
> >>>>>>>>>>>>>           if (is_amd()) {
> >>>>>>>>>>>>> @@ -515,19 +523,13 @@
> >>>>>>>>>>>>>               result |= CPU_LZCNT;
> >>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
> >>>>>>>>>>>>>               result |= CPU_SSE4A;
> >>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
> >>>>>>>>>>>>> +        result |= CPU_HT;
> >>>>>>>>>>>>>           }
> >>>>>>>>>>>>>           // Intel features.
> >>>>>>>>>>>>>           if(is_intel()) {
> >>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>> -         result |= CPU_ADX;
> >>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>> -        result |= CPU_BMI2;
> >>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>> -        result |= CPU_SHA;
> >>>>>>>>>>>>>             if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
> !=
> >>>>>>>>>>>>> 0)
> >>>>>>>>>>>>>               result |= CPU_LZCNT;
> >>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>> -        result |= CPU_FMA;
> >>>>>>>>>>>>>             // for Intel, ecx.bits.misalignsse bit (bit 8)
> >>>>>>>>>>>>> indicates
> >>>>>>>>>>>>> support for prefetchw
> >>>>>>>>>>>>>             if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
> !=
> >>>>>>>>>>>>> 0)
> >>>>>>>>>>>>> {
> >>>>>>>>>>>>>               result |= CPU_3DNOW_PREFETCH;
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Please let me know your comments.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> Thanks for your time.
> >>>>>>>>>>>>> Rohit
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Thanks for taking time to review the code.
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
> >>>>>>>>>>>>>>>              }
> >>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
> >>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>> +    if (supports_sha()) {
> >>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
> >>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
> >>>>>>>>>>>>>>> UseSHA256Intrinsics
> >>>>>>>>>>>>>>> ||
> >>>>>>>>>>>>>>> UseSHA512Intrinsics) {
> >>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
> >>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
> >>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
> >>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> >>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
> this
> >>>>>>>>>>>>>>> CPU");
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
> >>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
> >>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
> >>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>            // some defaults for AMD family 15h
> >>>>>>>>>>>>>>>            if ( cpu_family() == 0x15 ) {
> >>>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
> >>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>        #ifdef COMPILER2
> >>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
> >>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
> cpus.
> >>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus < 17h.
> >>>>>>>>>>>>>>>              FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>>        #endif // COMPILER2
> >>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
> >>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
> >>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
> >>>>>>>>>>>>>>> UnalignedLoadStores
> >>>>>>>>>>>>>>> for
> >>>>>>>>>>>>>>> Array Copy
> >>>>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
> >>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> >>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +      if (supports_bmi2() &&
> >>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
> >>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseBMI2Instructions, true);
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +      if (UseSHA) {
> >>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> >>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
> >>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
> crypto
> >>>>>>>>>>>>>>> hash
> >>>>>>>>>>>>>>> functions not available on this CPU.");
> >>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>> +        }
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +#ifdef COMPILER2
> >>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
> >>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> >>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >>>>>>>>>>>>>>> +        }
> >>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>> +#endif
> >>>>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>>>          }
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>          if( is_intel() ) { // Intel cpus specific settings
> >>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
> >>>>>>>>>>>>>>>              result |= CPU_CLMUL;
> >>>>>>>>>>>>>>>            if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
> >>>>>>>>>>>>>>>              result |= CPU_RTM;
> >>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>>>> +       result |= CPU_ADX;
> >>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>>>> +      result |= CPU_BMI2;
> >>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>>>> +      result |= CPU_SHA;
> >>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>>>> +      result |= CPU_FMA;
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>            // AMD features.
> >>>>>>>>>>>>>>>            if (is_amd()) {
> >>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
> >>>>>>>>>>>>>>>                result |= CPU_LZCNT;
> >>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
> 0)
> >>>>>>>>>>>>>>>                result |= CPU_SSE4A;
> >>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
> >>>>>>>>>>>>>>> +        result |= CPU_HT;
> >>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>>            // Intel features.
> >>>>>>>>>>>>>>>            if(is_intel()) {
> >>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>>>> -         result |= CPU_ADX;
> >>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>>>> -        result |= CPU_BMI2;
> >>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>>>> -        result |= CPU_SHA;
> >>>>>>>>>>>>>>>              if(_cpuid_info.ext_cpuid1_ecx.
> bits.lzcnt_intel
> >>>>>>>>>>>>>>> !=
> >>>>>>>>>>>>>>> 0)
> >>>>>>>>>>>>>>>                result |= CPU_LZCNT;
> >>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>>>> -        result |= CPU_FMA;
> >>>>>>>>>>>>>>>              // for Intel, ecx.bits.misalignsse bit (bit 8)
> >>>>>>>>>>>>>>> indicates
> >>>>>>>>>>>>>>> support for prefetchw
> >>>>>>>>>>>>>>>              if (_cpuid_info.ext_cpuid1_ecx.
> bits.misalignsse
> >>>>>>>>>>>>>>> !=
> >>>>>>>>>>>>>>> 0) {
> >>>>>>>>>>>>>>>                result |= CPU_3DNOW_PREFETCH;
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>> Regards,
> >>>>>>>>>>>>>>> Rohit
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
> >>>>>>>>>>>>>>>>> <rohitarulraj at gmail.com>
> >>>>>>>>>>>>>>>>> wrote:
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
> >>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
> >>>>>>>>>>>>>>>>>> wrote:
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>> Hi Rohit,
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already
> >>>>>>>>>>>>>>>>>>> see
> >>>>>>>>>>>>>>>>>>> a
> >>>>>>>>>>>>>>>>>>> lot of
> >>>>>>>>>>>>>>>>>>> logic
> >>>>>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>>>>>>> David
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source
> >>>>>>>>>>>>>>>>>> base,
> >>>>>>>>>>>>>>>>>> test
> >>>>>>>>>>>>>>>>>> and
> >>>>>>>>>>>>>>>>>> resubmit for review.
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>> Regards,
> >>>>>>>>>>>>>>>>>> Rohit
> >>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> Hi All,
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
> >>>>>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg
> >>>>>>>>>>>>>>>>> ($make
> >>>>>>>>>>>>>>>>> default) and didnt find any regressions.
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> Can anyone please volunteer to review this patch  which
> >>>>>>>>>>>>>>>>> sets
> >>>>>>>>>>>>>>>>> flag/ISA
> >>>>>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> ************************* Patch
> >>>>>>>>>>>>>>>>> ****************************
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
> >>>>>>>>>>>>>>>>>               }
> >>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(UseSSE42Intrinsics,
> false);
> >>>>>>>>>>>>>>>>>             }
> >>>>>>>>>>>>>>>>> +    if (supports_sha()) {
> >>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
> >>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
> >>>>>>>>>>>>>>>>> UseSHA256Intrinsics
> >>>>>>>>>>>>>>>>> ||
> >>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
> >>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
> >>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
> >>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
> >>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> >>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available on
> >>>>>>>>>>>>>>>>> this
> >>>>>>>>>>>>>>>>> CPU");
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
> >>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
> >>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
> >>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>             // some defaults for AMD family 15h
> >>>>>>>>>>>>>>>>>             if ( cpu_family() == 0x15 ) {
> >>>>>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
> >>>>>>>>>>>>>>>>>             }
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>         #ifdef COMPILER2
> >>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
> >>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current AMD
> >>>>>>>>>>>>>>>>> cpus.
> >>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
> 17h.
> >>>>>>>>>>>>>>>>>               FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>>>>>>>>>>>>>>>>             }
> >>>>>>>>>>>>>>>>>         #endif // COMPILER2
> >>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
> >>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
> >>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
> >>>>>>>>>>>>>>>>> UnalignedLoadStores
> >>>>>>>>>>>>>>>>> for
> >>>>>>>>>>>>>>>>> Array Copy
> >>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
> >>>>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
> >>>>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
> >>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
> >>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
> >>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +      if (UseSHA) {
> >>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> >>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
> >>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
> >>>>>>>>>>>>>>>>> crypto
> >>>>>>>>>>>>>>>>> hash
> >>>>>>>>>>>>>>>>> functions not available on this CPU.");
> >>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>>>> +        }
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +#ifdef COMPILER2
> >>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
> >>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> >>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >>>>>>>>>>>>>>>>> +        }
> >>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>> +#endif
> >>>>>>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>>>>>           }
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>           if( is_intel() ) { // Intel cpus specific
> >>>>>>>>>>>>>>>>> settings
> >>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
> >>>>>>>>>>>>>>>>>               result |= CPU_CLMUL;
> >>>>>>>>>>>>>>>>>             if (_cpuid_info.sef_cpuid7_ebx.bits.rtm !=
> 0)
> >>>>>>>>>>>>>>>>>               result |= CPU_RTM;
> >>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>>>>>> +       result |= CPU_ADX;
> >>>>>>>>>>>>>>>>> +    if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>>>>>> +      result |= CPU_BMI2;
> >>>>>>>>>>>>>>>>> +    if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>>>>>> +      result |= CPU_SHA;
> >>>>>>>>>>>>>>>>> +    if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>>>>>> +      result |= CPU_FMA;
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>             // AMD features.
> >>>>>>>>>>>>>>>>>             if (is_amd()) {
> >>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
> >>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
> >>>>>>>>>>>>>>>>>               if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a
> !=
> >>>>>>>>>>>>>>>>> 0)
> >>>>>>>>>>>>>>>>>                 result |= CPU_SSE4A;
> >>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
> >>>>>>>>>>>>>>>>> +        result |= CPU_HT;
> >>>>>>>>>>>>>>>>>             }
> >>>>>>>>>>>>>>>>>             // Intel features.
> >>>>>>>>>>>>>>>>>             if(is_intel()) {
> >>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>>>>>> -         result |= CPU_ADX;
> >>>>>>>>>>>>>>>>> -      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>>>>>> -        result |= CPU_BMI2;
> >>>>>>>>>>>>>>>>> -      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>>>>>> -        result |= CPU_SHA;
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
> >>>>>>>>>>>>>>>>> != 0)
> >>>>>>>>>>>>>>>>>                 result |= CPU_LZCNT;
> >>>>>>>>>>>>>>>>> -      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>>>>>> -        result |= CPU_FMA;
> >>>>>>>>>>>>>>>>>               // for Intel, ecx.bits.misalignsse bit (bit
> >>>>>>>>>>>>>>>>> 8)
> >>>>>>>>>>>>>>>>> indicates
> >>>>>>>>>>>>>>>>> support for prefetchw
> >>>>>>>>>>>>>>>>>               if
> >>>>>>>>>>>>>>>>> (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
> >>>>>>>>>>>>>>>>> !=
> >>>>>>>>>>>>>>>>> 0) {
> >>>>>>>>>>>>>>>>>                 result |= CPU_3DNOW_PREFETCH;
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> ******************************
> ********************************
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>>>>> Rohit
> >>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
> >>>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
> >>>>>>>>>>>>>>>>>>>> wrote:
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>> Hi Rohit,
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch
> >>>>>>>>>>>>>>>>>>>>>> (openJDK9)
> >>>>>>>>>>>>>>>>>>>>>> which
> >>>>>>>>>>>>>>>>>>>>>> sets
> >>>>>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor
> >>>>>>>>>>>>>>>>>>>>>> and
> >>>>>>>>>>>>>>>>>>>>>> help
> >>>>>>>>>>>>>>>>>>>>>> us
> >>>>>>>>>>>>>>>>>>>>>> with
> >>>>>>>>>>>>>>>>>>>>>> the commit process.
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>> Webrev:
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/
> 08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from
> systems
> >>>>>>>>>>>>>>>>>>>>> outside
> >>>>>>>>>>>>>>>>>>>>> the
> >>>>>>>>>>>>>>>>>>>>> OpenJDK
> >>>>>>>>>>>>>>>>>>>>> infrastructure and ...
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for
> >>>>>>>>>>>>>>>>>>>>>> reference.
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the
> >>>>>>>>>>>>>>>>>>>>> mail
> >>>>>>>>>>>>>>>>>>>>> servers.
> >>>>>>>>>>>>>>>>>>>>> If
> >>>>>>>>>>>>>>>>>>>>> the
> >>>>>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise
> you
> >>>>>>>>>>>>>>>>>>>>> will
> >>>>>>>>>>>>>>>>>>>>> need
> >>>>>>>>>>>>>>>>>>>>> to
> >>>>>>>>>>>>>>>>>>>>> find
> >>>>>>>>>>>>>>>>>>>>> an
> >>>>>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
> >>>>>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
> >>>>>>>>>>>>>>>>>>>>>> default)
> >>>>>>>>>>>>>>>>>>>>>> and
> >>>>>>>>>>>>>>>>>>>>>> didnt find any regressions.
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to
> >>>>>>>>>>>>>>>>>>>>> comment
> >>>>>>>>>>>>>>>>>>>>> on
> >>>>>>>>>>>>>>>>>>>>> testing
> >>>>>>>>>>>>>>>>>>>>> requirements.
> >>>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>> Thanks,
> >>>>>>>>>>>>>>>>>>>>> David
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>> Thanks David,
> >>>>>>>>>>>>>>>>>>>> Yes, it's a small patch.
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
> >>>>>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
> >>>>>>>>>>>>>>>>>>>>                }
> >>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(UseSSE42Intrinsics,
> >>>>>>>>>>>>>>>>>>>> false);
> >>>>>>>>>>>>>>>>>>>>              }
> >>>>>>>>>>>>>>>>>>>> +    if (supports_sha()) {
> >>>>>>>>>>>>>>>>>>>> +      if (FLAG_IS_DEFAULT(UseSHA)) {
> >>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(UseSHA, true);
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +    } else if (UseSHA || UseSHA1Intrinsics ||
> >>>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
> >>>>>>>>>>>>>>>>>>>> ||
> >>>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
> >>>>>>>>>>>>>>>>>>>> +      if (!FLAG_IS_DEFAULT(UseSHA) ||
> >>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
> >>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
> >>>>>>>>>>>>>>>>>>>> +          !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> >>>>>>>>>>>>>>>>>>>> +        warning("SHA instructions are not available
> on
> >>>>>>>>>>>>>>>>>>>> this
> >>>>>>>>>>>>>>>>>>>> CPU");
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA, false);
> >>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
> >>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
> >>>>>>>>>>>>>>>>>>>> +      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
> >>>>>>>>>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>              // some defaults for AMD family 15h
> >>>>>>>>>>>>>>>>>>>>              if ( cpu_family() == 0x15 ) {
> >>>>>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
> >>>>>>>>>>>>>>>>>>>>              }
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>          #ifdef COMPILER2
> >>>>>>>>>>>>>>>>>>>> -    if (MaxVectorSize > 16) {
> >>>>>>>>>>>>>>>>>>>> -      // Limit vectors size to 16 bytes on current
> AMD
> >>>>>>>>>>>>>>>>>>>> cpus.
> >>>>>>>>>>>>>>>>>>>> +    if (cpu_family() < 0x17 && MaxVectorSize > 16) {
> >>>>>>>>>>>>>>>>>>>> +      // Limit vectors size to 16 bytes on AMD cpus <
> >>>>>>>>>>>>>>>>>>>> 17h.
> >>>>>>>>>>>>>>>>>>>>                FLAG_SET_DEFAULT(MaxVectorSize, 16);
> >>>>>>>>>>>>>>>>>>>>              }
> >>>>>>>>>>>>>>>>>>>>          #endif // COMPILER2
> >>>>>>>>>>>>>>>>>>>> +
> >>>>>>>>>>>>>>>>>>>> +    // Some defaults for AMD family 17h
> >>>>>>>>>>>>>>>>>>>> +    if ( cpu_family() == 0x17 ) {
> >>>>>>>>>>>>>>>>>>>> +      // On family 17h processors use XMM and
> >>>>>>>>>>>>>>>>>>>> UnalignedLoadStores
> >>>>>>>>>>>>>>>>>>>> for
> >>>>>>>>>>>>>>>>>>>> Array Copy
> >>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
> >>>>>>>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>>>>>>> +        UseXMMForArrayCopy = true;
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +      if (supports_sse2() &&
> >>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
> >>>>>>>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>>>>>>> +        UseUnalignedLoadStores = true;
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +      if (supports_bmi2() &&
> >>>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
> >>>>>>>>>>>>>>>>>>>> {
> >>>>>>>>>>>>>>>>>>>> +        UseBMI2Instructions = true;
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +      if (MaxVectorSize > 32) {
> >>>>>>>>>>>>>>>>>>>> +        FLAG_SET_DEFAULT(MaxVectorSize, 32);
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +      if (UseSHA) {
> >>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
> >>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics,
> false);
> >>>>>>>>>>>>>>>>>>>> +        } else if (UseSHA512Intrinsics) {
> >>>>>>>>>>>>>>>>>>>> +          warning("Intrinsics for SHA-384 and SHA-512
> >>>>>>>>>>>>>>>>>>>> crypto
> >>>>>>>>>>>>>>>>>>>> hash
> >>>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
> >>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseSHA512Intrinsics,
> false);
> >>>>>>>>>>>>>>>>>>>> +        }
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
> >>>>>>>>>>>>>>>>>>>> +      if (supports_sse4_2()) {
> >>>>>>>>>>>>>>>>>>>> +        if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
> >>>>>>>>>>>>>>>>>>>> +          FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> >>>>>>>>>>>>>>>>>>>> +        }
> >>>>>>>>>>>>>>>>>>>> +      }
> >>>>>>>>>>>>>>>>>>>> +#endif
> >>>>>>>>>>>>>>>>>>>> +    }
> >>>>>>>>>>>>>>>>>>>>            }
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>>            if( is_intel() ) { // Intel cpus specific
> >>>>>>>>>>>>>>>>>>>> settings
> >>>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
> >>>>>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
> >>>>>>>>>>>>>>>>>>>>                  result |= CPU_LZCNT;
> >>>>>>>>>>>>>>>>>>>>                if (_cpuid_info.ext_cpuid1_ecx.
> bits.sse4a
> >>>>>>>>>>>>>>>>>>>> !=
> >>>>>>>>>>>>>>>>>>>> 0)
> >>>>>>>>>>>>>>>>>>>>                  result |= CPU_SSE4A;
> >>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
> >>>>>>>>>>>>>>>>>>>> +        result |= CPU_BMI2;
> >>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
> >>>>>>>>>>>>>>>>>>>> +        result |= CPU_HT;
> >>>>>>>>>>>>>>>>>>>> +      if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
> >>>>>>>>>>>>>>>>>>>> +        result |= CPU_ADX;
> >>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
> >>>>>>>>>>>>>>>>>>>> +        result |= CPU_SHA;
> >>>>>>>>>>>>>>>>>>>> +      if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
> >>>>>>>>>>>>>>>>>>>> +        result |= CPU_FMA;
> >>>>>>>>>>>>>>>>>>>>              }
> >>>>>>>>>>>>>>>>>>>>              // Intel features.
> >>>>>>>>>>>>>>>>>>>>              if(is_intel()) {
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>> Regards,
> >>>>>>>>>>>>>>>>>>>> Rohit
> >>>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>>>
> >>>>>>>>>>>>>>
> >>>>>>>>>>>>
> >>>>>>>>
> >>>>> >
>