RFR: Newer AMD 17h (EPYC) Processor family defaults
Rohit Arul Raj
rohitarulraj at gmail.com
Fri Nov 3 07:21:33 UTC 2017
Vladimir, Thanks for the info.
Regards,
Rohit
On Fri, Nov 3, 2017 at 10:33 AM, Vladimir Kozlov <vladimir.kozlov at oracle.com
> wrote:
> It was pushed long ago - Oct 3, see:
>
> http://hg.openjdk.java.net/jdk10/hs/rev/fde01e0fccb4
> https://bugs.openjdk.java.net/browse/JDK-8187219
>
> But we have to repositories jdk10/jdk10 master and jdk10/hs for Hotspot
> changes. We did not promoted changes from jdk10/hs to jdk10/jdk10. Testing
> currently is under way to do promotion. But I can not say in which jdk 10
> build it will happened. Look for push notification in JDK-8187219.
>
> Regards,
> Vladimir
>
> On 11/2/17 8:30 PM, Rohit Arul Raj wrote:
>
>> Hello Vladimir,
>>
>> Is there any update on pushing these changes to jdk10?
>>
>> Thanks,
>> Rohit
>>
>> On Fri, Sep 22, 2017 at 1:11 PM, Rohit Arul Raj <rohitarulraj at gmail.com>
>> wrote:
>>
>> Thanks Vladimir,
>>>
>>> On Wed, Sep 20, 2017 at 10:07 PM, Vladimir Kozlov
>>> <vladimir.kozlov at oracle.com> wrote:
>>>
>>>> __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
>>>>> __ jcc(Assembler::belowEqual, done);
>>>>> __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
>>>>> - __ jccb(Assembler::belowEqual, ext_cpuid1);
>>>>> + __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>>>
>>>>
>>>>
>>>> Good. You may need to increase size of the buffer too (to be safe) to
>>>>
>>> 1100:
>>>
>>>>
>>>> static const int stub_size = 1000;
>>>>
>>>>
>>> Please find the updated patch after the requested change.
>>>
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>> @@ -46,7 +46,7 @@
>>> address VM_Version::_cpuinfo_cont_addr = 0;
>>>
>>> static BufferBlob* stub_blob;
>>> -static const int stub_size = 1000;
>>> +static const int stub_size = 1100;
>>>
>>> extern "C" {
>>> typedef void (*get_cpu_info_stub_t)(void*);
>>> @@ -70,7 +70,7 @@
>>> bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>
>>> Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>> - Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> done, wrapup;
>>> + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>> ext_cpuid8, done, wrapup;
>>> Label legacy_setup, save_restore_except, legacy_save_restore,
>>> start_simd_check;
>>>
>>> StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>> @@ -267,14 +267,30 @@
>>> __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
>>> __ jcc(Assembler::belowEqual, done);
>>> __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
>>> - __ jccb(Assembler::belowEqual, ext_cpuid1);
>>> + __ jcc(Assembler::belowEqual, ext_cpuid1);
>>> __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
>>> __ jccb(Assembler::belowEqual, ext_cpuid5);
>>> __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
>>> __ jccb(Assembler::belowEqual, ext_cpuid7);
>>> + __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above)
>>> supported?
>>> + __ jccb(Assembler::belowEqual, ext_cpuid8);
>>> + __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
>>> + __ jccb(Assembler::below, ext_cpuid8);
>>> + //
>>> + // Extended cpuid(0x8000001E)
>>> + //
>>> + __ movl(rax, 0x8000001E);
>>> + __ cpuid();
>>> + __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_
>>> cpuid1E_offset())));
>>> + __ movl(Address(rsi, 0), rax);
>>> + __ movl(Address(rsi, 4), rbx);
>>> + __ movl(Address(rsi, 8), rcx);
>>> + __ movl(Address(rsi,12), rdx);
>>> +
>>> //
>>> // Extended cpuid(0x80000008)
>>> //
>>> + __ bind(ext_cpuid8);
>>> __ movl(rax, 0x80000008);
>>> __ cpuid();
>>> __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid
>>> 8_offset())));
>>> @@ -1109,11 +1125,27 @@
>>> }
>>>
>>> #ifdef COMPILER2
>>> - if (MaxVectorSize > 16) {
>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>> }
>>> #endif // COMPILER2
>>> +
>>> + // Some defaults for AMD family 17h
>>> + if ( cpu_family() == 0x17 ) {
>>> + // On family 17h processors use XMM and UnalignedLoadStores for
>>> Array Copy
>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>> + }
>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>> + }
>>> +#ifdef COMPILER2
>>> + if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>> + }
>>> +#endif
>>> + }
>>> }
>>>
>>> if( is_intel() ) { // Intel cpus specific settings
>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>> @@ -228,6 +228,15 @@
>>> } bits;
>>> };
>>>
>>> + union ExtCpuid1EEbx {
>>> + uint32_t value;
>>> + struct {
>>> + uint32_t : 8,
>>> + threads_per_core : 8,
>>> + : 16;
>>> + } bits;
>>> + };
>>> +
>>> union XemXcr0Eax {
>>> uint32_t value;
>>> struct {
>>> @@ -398,6 +407,12 @@
>>> ExtCpuid8Ecx ext_cpuid8_ecx;
>>> uint32_t ext_cpuid8_edx; // reserved
>>>
>>> + // cpuid function 0x8000001E // AMD 17h
>>> + uint32_t ext_cpuid1E_eax;
>>> + ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>> + uint32_t ext_cpuid1E_ecx;
>>> + uint32_t ext_cpuid1E_edx; // unused currently
>>> +
>>> // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>> register)
>>> XemXcr0Eax xem_xcr0_eax;
>>> uint32_t xem_xcr0_edx; // reserved
>>> @@ -505,6 +520,14 @@
>>> result |= CPU_CLMUL;
>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>> result |= CPU_RTM;
>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> + result |= CPU_ADX;
>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> + result |= CPU_BMI2;
>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> + result |= CPU_SHA;
>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> + result |= CPU_FMA;
>>>
>>> // AMD features.
>>> if (is_amd()) {
>>> @@ -518,16 +541,8 @@
>>> }
>>> // Intel features.
>>> if(is_intel()) {
>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>> - result |= CPU_ADX;
>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>> - result |= CPU_BMI2;
>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>> - result |= CPU_SHA;
>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>> result |= CPU_LZCNT;
>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>> - result |= CPU_FMA;
>>> // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>> support for prefetchw
>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>> result |= CPU_3DNOW_PREFETCH;
>>> @@ -590,6 +605,7 @@
>>> static ByteSize ext_cpuid5_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>> static ByteSize ext_cpuid7_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>> static ByteSize ext_cpuid8_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>> + static ByteSize ext_cpuid1E_offset() { return
>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>> static ByteSize tpl_cpuidB0_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>> static ByteSize tpl_cpuidB1_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>> static ByteSize tpl_cpuidB2_offset() { return
>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>> @@ -673,8 +689,12 @@
>>> if (is_intel() && supports_processor_topology()) {
>>> result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>> } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>> - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> - cores_per_cpu();
>>> + if (cpu_family() >= 0x17) {
>>> + result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>>> + } else {
>>> + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>> + cores_per_cpu();
>>> + }
>>> }
>>> return (result == 0 ? 1 : result);
>>> }
>>>
>>> Regards,
>>> Rohit
>>>
>>>
>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> @@ -70,7 +70,7 @@
>>>>> bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>
>>>>> Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>> - Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>> done, wrapup;
>>>>> + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>> ext_cpuid8, done, wrapup;
>>>>> Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>> start_simd_check;
>>>>>
>>>>> StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>> @@ -267,14 +267,30 @@
>>>>> __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
>>>>> __ jcc(Assembler::belowEqual, done);
>>>>> __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
>>>>> - __ jccb(Assembler::belowEqual, ext_cpuid1);
>>>>> + __ jcc(Assembler::belowEqual, ext_cpuid1);
>>>>> __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
>>>>> __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>> __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
>>>>> __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>> + __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above)
>>>>> supported?
>>>>> + __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>> + __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
>>>>> + __ jccb(Assembler::below, ext_cpuid8);
>>>>> + //
>>>>> + // Extended cpuid(0x8000001E)
>>>>> + //
>>>>> + __ movl(rax, 0x8000001E);
>>>>> + __ cpuid();
>>>>> + __ lea(rsi, Address(rbp,
>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>> + __ movl(Address(rsi, 0), rax);
>>>>> + __ movl(Address(rsi, 4), rbx);
>>>>> + __ movl(Address(rsi, 8), rcx);
>>>>> + __ movl(Address(rsi,12), rdx);
>>>>> +
>>>>> //
>>>>> // Extended cpuid(0x80000008)
>>>>> //
>>>>> + __ bind(ext_cpuid8);
>>>>> __ movl(rax, 0x80000008);
>>>>> __ cpuid();
>>>>> __ lea(rsi, Address(rbp,
>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>> @@ -1109,11 +1125,27 @@
>>>>> }
>>>>>
>>>>> #ifdef COMPILER2
>>>>> - if (MaxVectorSize > 16) {
>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>> }
>>>>> #endif // COMPILER2
>>>>> +
>>>>> + // Some defaults for AMD family 17h
>>>>> + if ( cpu_family() == 0x17 ) {
>>>>> + // On family 17h processors use XMM and UnalignedLoadStores for
>>>>> Array Copy
>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>> + }
>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>
>>>> {
>>>
>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>> + }
>>>>> +#ifdef COMPILER2
>>>>> + if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>> + }
>>>>> +#endif
>>>>> + }
>>>>> }
>>>>>
>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> @@ -228,6 +228,15 @@
>>>>> } bits;
>>>>> };
>>>>>
>>>>> + union ExtCpuid1EEbx {
>>>>> + uint32_t value;
>>>>> + struct {
>>>>> + uint32_t : 8,
>>>>> + threads_per_core : 8,
>>>>> + : 16;
>>>>> + } bits;
>>>>> + };
>>>>> +
>>>>> union XemXcr0Eax {
>>>>> uint32_t value;
>>>>> struct {
>>>>> @@ -398,6 +407,12 @@
>>>>> ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>> uint32_t ext_cpuid8_edx; // reserved
>>>>>
>>>>> + // cpuid function 0x8000001E // AMD 17h
>>>>> + uint32_t ext_cpuid1E_eax;
>>>>> + ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>> + uint32_t ext_cpuid1E_ecx;
>>>>> + uint32_t ext_cpuid1E_edx; // unused currently
>>>>> +
>>>>> // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>> register)
>>>>> XemXcr0Eax xem_xcr0_eax;
>>>>> uint32_t xem_xcr0_edx; // reserved
>>>>> @@ -505,6 +520,14 @@
>>>>> result |= CPU_CLMUL;
>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>> result |= CPU_RTM;
>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>> + result |= CPU_ADX;
>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>> + result |= CPU_BMI2;
>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>> + result |= CPU_SHA;
>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>> + result |= CPU_FMA;
>>>>>
>>>>> // AMD features.
>>>>> if (is_amd()) {
>>>>> @@ -518,16 +541,8 @@
>>>>> }
>>>>> // Intel features.
>>>>> if(is_intel()) {
>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>> - result |= CPU_ADX;
>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>> - result |= CPU_BMI2;
>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>> - result |= CPU_SHA;
>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>> result |= CPU_LZCNT;
>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>> - result |= CPU_FMA;
>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>> support for prefetchw
>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>> @@ -590,6 +605,7 @@
>>>>> static ByteSize ext_cpuid5_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>> static ByteSize ext_cpuid7_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>> static ByteSize ext_cpuid8_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>> + static ByteSize ext_cpuid1E_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>> static ByteSize tpl_cpuidB0_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>> static ByteSize tpl_cpuidB1_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>> static ByteSize tpl_cpuidB2_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>> @@ -673,8 +689,12 @@
>>>>> if (is_intel() && supports_processor_topology()) {
>>>>> result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>> } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>> - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>> - cores_per_cpu();
>>>>> + if (cpu_family() >= 0x17) {
>>>>> + result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
>>>>>
>>>> 1;
>>>
>>>> + } else {
>>>>> + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>> + cores_per_cpu();
>>>>> + }
>>>>> }
>>>>> return (result == 0 ? 1 : result);
>>>>> }
>>>>>
>>>>> Please let me know your comments.
>>>>> Thanks for your review.
>>>>>
>>>>> Regards,
>>>>> Rohit
>>>>>
>>>>>
>>>>>>
>>>>>> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Hello David,
>>>>>>>
>>>>>>>
>>>>>>>>>
>>>>>>>>> 1. ExtCpuid1EEx
>>>>>>>>>
>>>>>>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>>>>>>>> inconsistent - and potentially confusing: I would have preferred to
>>>>>>>>> see
>>>>>>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> Yes, I can change it accordingly.
>>>>>>>>
>>>>>>>>
>>>>>>> I have attached the updated, re-tested patch as per your comments
>>>>>>>
>>>>>> above.
>>>
>>>>
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>> @@ -70,7 +70,7 @@
>>>>>>> bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>>
>>>>>>> Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>>>> - Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>>> done, wrapup;
>>>>>>> + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>>>> ext_cpuid8, done, wrapup;
>>>>>>> Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>>>> start_simd_check;
>>>>>>>
>>>>>>> StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>>>> @@ -272,9 +272,23 @@
>>>>>>> __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>> __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008)
>>>>>>>
>>>>>> supported?
>>>
>>>> __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>>> + __ cmpl(rax, 0x80000008); // Is cpuid(0x8000001E) supported?
>>>>>>> + __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>>> + //
>>>>>>> + // Extended cpuid(0x8000001E)
>>>>>>> + //
>>>>>>> + __ movl(rax, 0x8000001E);
>>>>>>> + __ cpuid();
>>>>>>> + __ lea(rsi, Address(rbp,
>>>>>>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
>>>>>>> + __ movl(Address(rsi, 0), rax);
>>>>>>> + __ movl(Address(rsi, 4), rbx);
>>>>>>> + __ movl(Address(rsi, 8), rcx);
>>>>>>> + __ movl(Address(rsi,12), rdx);
>>>>>>> +
>>>>>>> //
>>>>>>> // Extended cpuid(0x80000008)
>>>>>>> //
>>>>>>> + __ bind(ext_cpuid8);
>>>>>>> __ movl(rax, 0x80000008);
>>>>>>> __ cpuid();
>>>>>>> __ lea(rsi, Address(rbp,
>>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>> }
>>>>>>>
>>>>>>> #ifdef COMPILER2
>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>> }
>>>>>>> #endif // COMPILER2
>>>>>>> +
>>>>>>> + // Some defaults for AMD family 17h
>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>> + // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>> for
>>>>>>> Array Copy
>>>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>> + }
>>>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLo
>>>>>>> adStores))
>>>>>>>
>>>>>> {
>>>
>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>> + }
>>>>>>> +#ifdef COMPILER2
>>>>>>> + if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>> {
>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>> + }
>>>>>>> +#endif
>>>>>>> + }
>>>>>>> }
>>>>>>>
>>>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>> @@ -228,6 +228,15 @@
>>>>>>> } bits;
>>>>>>> };
>>>>>>>
>>>>>>> + union ExtCpuid_1E_Ebx {
>>>>>>> + uint32_t value;
>>>>>>> + struct {
>>>>>>> + uint32_t : 8,
>>>>>>> + threads_per_core : 8,
>>>>>>> + : 16;
>>>>>>> + } bits;
>>>>>>> + };
>>>>>>> +
>>>>>>> union XemXcr0Eax {
>>>>>>> uint32_t value;
>>>>>>> struct {
>>>>>>> @@ -398,6 +407,12 @@
>>>>>>> ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>> uint32_t ext_cpuid8_edx; // reserved
>>>>>>>
>>>>>>> + // cpuid function 0x8000001E // AMD 17h
>>>>>>> + uint32_t ext_cpuid_1E_eax;
>>>>>>> + ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
>>>>>>> + uint32_t ext_cpuid_1E_ecx;
>>>>>>> + uint32_t ext_cpuid_1E_edx; // unused currently
>>>>>>> +
>>>>>>> // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>>>> register)
>>>>>>> XemXcr0Eax xem_xcr0_eax;
>>>>>>> uint32_t xem_xcr0_edx; // reserved
>>>>>>> @@ -505,6 +520,14 @@
>>>>>>> result |= CPU_CLMUL;
>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>> result |= CPU_RTM;
>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> + result |= CPU_ADX;
>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> + result |= CPU_BMI2;
>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> + result |= CPU_SHA;
>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> + result |= CPU_FMA;
>>>>>>>
>>>>>>> // AMD features.
>>>>>>> if (is_amd()) {
>>>>>>> @@ -518,16 +541,8 @@
>>>>>>> }
>>>>>>> // Intel features.
>>>>>>> if(is_intel()) {
>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>> - result |= CPU_ADX;
>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>> - result |= CPU_BMI2;
>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>> - result |= CPU_SHA;
>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>> result |= CPU_LZCNT;
>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>> - result |= CPU_FMA;
>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>>>> support for prefetchw
>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>>>> @@ -590,6 +605,7 @@
>>>>>>> static ByteSize ext_cpuid5_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>> static ByteSize ext_cpuid7_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>> static ByteSize ext_cpuid8_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>>> + static ByteSize ext_cpuid_1E_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>>>>>>> static ByteSize tpl_cpuidB0_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>> static ByteSize tpl_cpuidB1_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>> static ByteSize tpl_cpuidB2_offset() { return
>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>>> @@ -673,8 +689,11 @@
>>>>>>> if (is_intel() && supports_processor_topology()) {
>>>>>>> result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>>>> } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>>> - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>> - cores_per_cpu();
>>>>>>> + if (cpu_family() >= 0x17)
>>>>>>> + result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core
>>>>>>>
>>>>>> +
>>>
>>>> 1;
>>>>>>> + else
>>>>>>> + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>> + cores_per_cpu();
>>>>>>> }
>>>>>>> return (result == 0 ? 1 : result);
>>>>>>> }
>>>>>>>
>>>>>>>
>>>>>>> Please let me know your comments
>>>>>>>
>>>>>>> Thanks for your time.
>>>>>>>
>>>>>>> Regards,
>>>>>>> Rohit
>>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>>>> David
>>>>>>>>> -----
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Reference:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_
>>>>>>>>>>
>>>>>>>>> Models_00h-0Fh.pdf
>>>
>>>> [Pg 82]
>>>>>>>>>>
>>>>>>>>>> CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>>>>>>> 15:8 ThreadsPerCore: threads per core. Read-only. Reset:
>>>>>>>>>> XXh.
>>>>>>>>>> The number of threads per core is ThreadsPerCore+1.
>>>>>>>>>>
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>> @@ -70,7 +70,7 @@
>>>>>>>>>> bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>>>>>>
>>>>>>>>>> Label detect_486, cpu486, detect_586, std_cpuid1,
>>>>>>>>>>
>>>>>>>>> std_cpuid4;
>>>
>>>> - Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
>>>>>>>>>>
>>>>>>>>> ext_cpuid7,
>>>
>>>> done, wrapup;
>>>>>>>>>> + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5,
>>>>>>>>>>
>>>>>>>>> ext_cpuid7,
>>>
>>>> ext_cpuid8, done, wrapup;
>>>>>>>>>> Label legacy_setup, save_restore_except,
>>>>>>>>>>
>>>>>>>>> legacy_save_restore,
>>>
>>>> start_simd_check;
>>>>>>>>>>
>>>>>>>>>> StubCodeMark mark(this, "VM_Version",
>>>>>>>>>> "get_cpu_info_stub");
>>>>>>>>>> @@ -272,9 +272,23 @@
>>>>>>>>>> __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>>>>>>> __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008)
>>>>>>>>>> supported?
>>>>>>>>>> __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>>>>>>> + __ cmpl(rax, 0x80000008); // Is cpuid(0x8000001E)
>>>>>>>>>>
>>>>>>>>> supported?
>>>
>>>> + __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>>>>>>> + //
>>>>>>>>>> + // Extended cpuid(0x8000001E)
>>>>>>>>>> + //
>>>>>>>>>> + __ movl(rax, 0x8000001E);
>>>>>>>>>> + __ cpuid();
>>>>>>>>>> + __ lea(rsi, Address(rbp,
>>>>>>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>>>>>>> + __ movl(Address(rsi, 0), rax);
>>>>>>>>>> + __ movl(Address(rsi, 4), rbx);
>>>>>>>>>> + __ movl(Address(rsi, 8), rcx);
>>>>>>>>>> + __ movl(Address(rsi,12), rdx);
>>>>>>>>>> +
>>>>>>>>>> //
>>>>>>>>>> // Extended cpuid(0x80000008)
>>>>>>>>>> //
>>>>>>>>>> + __ bind(ext_cpuid8);
>>>>>>>>>> __ movl(rax, 0x80000008);
>>>>>>>>>> __ cpuid();
>>>>>>>>>> __ lea(rsi, Address(rbp,
>>>>>>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>>>>>>> @@ -1109,11 +1123,27 @@
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>> }
>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>> +
>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>> + // On family 17h processors use XMM and UnalignedLoadStores
>>>>>>>>>> for
>>>>>>>>>> Array Copy
>>>>>>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArray
>>>>>>>>>> Copy))
>>>>>>>>>>
>>>>>>>>> {
>>>
>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>> + }
>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>> {
>>>>>>>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>> + }
>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>> + if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpill
>>>>>>>>>> ing))
>>>>>>>>>>
>>>>>>>>> {
>>>
>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>> + }
>>>>>>>>>> +#endif
>>>>>>>>>> + }
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>> @@ -228,6 +228,15 @@
>>>>>>>>>> } bits;
>>>>>>>>>> };
>>>>>>>>>>
>>>>>>>>>> + union ExtCpuid1EEx {
>>>>>>>>>> + uint32_t value;
>>>>>>>>>> + struct {
>>>>>>>>>> + uint32_t : 8,
>>>>>>>>>> + threads_per_core : 8,
>>>>>>>>>> + : 16;
>>>>>>>>>> + } bits;
>>>>>>>>>> + };
>>>>>>>>>> +
>>>>>>>>>> union XemXcr0Eax {
>>>>>>>>>> uint32_t value;
>>>>>>>>>> struct {
>>>>>>>>>> @@ -398,6 +407,12 @@
>>>>>>>>>> ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>>>>>>> uint32_t ext_cpuid8_edx; // reserved
>>>>>>>>>>
>>>>>>>>>> + // cpuid function 0x8000001E // AMD 17h
>>>>>>>>>> + uint32_t ext_cpuid1E_eax;
>>>>>>>>>> + ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>>>>>>> + uint32_t ext_cpuid1E_ecx;
>>>>>>>>>> + uint32_t ext_cpuid1E_edx; // unused currently
>>>>>>>>>> +
>>>>>>>>>> // extended control register XCR0 (the
>>>>>>>>>>
>>>>>>>>> XFEATURE_ENABLED_MASK
>>>
>>>> register)
>>>>>>>>>> XemXcr0Eax xem_xcr0_eax;
>>>>>>>>>> uint32_t xem_xcr0_edx; // reserved
>>>>>>>>>> @@ -505,6 +520,14 @@
>>>>>>>>>> result |= CPU_CLMUL;
>>>>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>> result |= CPU_RTM;
>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>
>>>>>>>>>> // AMD features.
>>>>>>>>>> if (is_amd()) {
>>>>>>>>>> @@ -518,16 +541,8 @@
>>>>>>>>>> }
>>>>>>>>>> // Intel features.
>>>>>>>>>> if(is_intel()) {
>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>> - result |= CPU_ADX;
>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>> - result |= CPU_BMI2;
>>>>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>> - result |= CPU_SHA;
>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>> - result |= CPU_FMA;
>>>>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>> indicates
>>>>>>>>>> support for prefetchw
>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0)
>>>>>>>>>> {
>>>>>>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>> @@ -590,6 +605,7 @@
>>>>>>>>>> static ByteSize ext_cpuid5_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>>>>>>> static ByteSize ext_cpuid7_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>>>>>>> static ByteSize ext_cpuid8_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>>>>>>> + static ByteSize ext_cpuid1E_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>>>>>>> static ByteSize tpl_cpuidB0_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>>>>>>> static ByteSize tpl_cpuidB1_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>>>>>>> static ByteSize tpl_cpuidB2_offset() { return
>>>>>>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>>>>>>> @@ -673,8 +689,11 @@
>>>>>>>>>> if (is_intel() && supports_processor_topology()) {
>>>>>>>>>> result = _cpuid_info.tpl_cpuidB0_ebx.bi
>>>>>>>>>> ts.logical_cpus;
>>>>>>>>>> } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>>>>>>> - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>>>>>>> - cores_per_cpu();
>>>>>>>>>> + if (cpu_family() >= 0x17)
>>>>>>>>>> + result = _cpuid_info.ext_cpuid1E_ebx.bi
>>>>>>>>>> ts.threads_per_core
>>>>>>>>>>
>>>>>>>>> +
>>>
>>>> 1;
>>>>>>>>>> + else
>>>>>>>>>> + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu
>>>>>>>>>>
>>>>>>>>> /
>>>
>>>> + cores_per_cpu();
>>>>>>>>>> }
>>>>>>>>>> return (result == 0 ? 1 : result);
>>>>>>>>>> }
>>>>>>>>>>
>>>>>>>>>> I have attached the patch for review.
>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>> Rohit
>>>>>>>>>>
>>>>>>>>>> Thanks,
>>>>>>>>>>> Vladimir
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>
>>>>>>>>>>>> No comments on AMD specific changes.
>>>>>>>>>>>>
>>>>>>>>>>>> Thanks,
>>>>>>>>>>>> David
>>>>>>>>>>>> -----
>>>>>>>>>>>>
>>>>>>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Hello David,
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot
>>>>>>>>>>>>>>> repo.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
>>>>>>>>>>>>>> 13548:1a9c2e07a826]
>>>>>>>>>>>>>> and was able to apply the patch
>>>>>>>>>>>>>> [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>>>>>>>> without any issues.
>>>>>>>>>>>>>> Can you share the error message that you are getting?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> I was getting this:
>>>>>>>>>>>>>
>>>>>>>>>>>>> applying hotspot.patch
>>>>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> Hunk #1 FAILED at 1108
>>>>>>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> Hunk #2 FAILED at 522
>>>>>>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>>>>>>>> abort: patch failed to apply
>>>>>>>>>>>>>
>>>>>>>>>>>>> but I started again and this time it applied fine, so not sure
>>>>>>>>>>>>> what
>>>>>>>>>>>>> was
>>>>>>>>>>>>> going on there.
>>>>>>>>>>>>>
>>>>>>>>>>>>> Cheers,
>>>>>>>>>>>>> David
>>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Changes look good. Only question I have is about
>>>>>>>>>>>>>>>>>>> MaxVectorSize.
>>>>>>>>>>>>>>>>>>> It
>>>>>>>>>>>>>>>>>>> is
>>>>>>>>>>>>>>>>>>> set
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>
>>>>
>>>>>>>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for
>>>>>>>>>>>>>>>>>> AMD
>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>> So
>>>>>>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
>>>>>>>>>>>>>>>>>> patch.
>>>>>>>>>>>>>>>>>> I
>>>>>>>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> My older patch had the below mentioned check which was
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> required
>>>
>>>> on
>>>>>>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been
>>>>>>>>>>>>>>>> handled
>>>>>>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> ...
>>>>>>>>>>>>>>>> + if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>> ..
>>>>>>>>>>>>>>>> ..
>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>
>>>>
>>>>>>>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>>>>>>>> AMD 15h doesn't have support for SHA. Still "UseSHA" flag
>>>>>>>>>>>>>>>>>> gets
>>>>>>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Is
>>>
>>>> there
>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>> underlying reason for this? I have handled this in the
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> patch
>>>
>>>> but
>>>>>>>>>>>>>>>>>> just
>>>>>>>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>>>>>>>> instructions
>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> 6a17c49de974
>>>
>>>>
>>>>>>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
>>>>>>>>>>>>>>>>> execute
>>>>>>>>>>>>>>>>> that
>>>>>>>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
>>>>>>>>>>>>>>>> instructions,
>>>>>>>>>>>>>>>> it should work.
>>>>>>>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> TestSHA1Intrinsics.java
>>>
>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> TestSHA512Intrinsics.java
>>>
>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> TestSHA256Intrinsics.java
>>>
>>>>
>>>>>>>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>> + // On family 17h processors use XMM and
>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>> + if (supports_sse4_2() &&
>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>> result |= CPU_CLMUL;
>>>>>>>>>>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>> result |= CPU_RTM;
>>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> // AMD features.
>>>>>>>>>>>>>>>> if (is_amd()) {
>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>> result |= CPU_SSE4A;
>>>>>>>>>>>>>>>> + if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>> + result |= CPU_HT;
>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>> // Intel features.
>>>>>>>>>>>>>>>> if(is_intel()) {
>>>>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>> - result |= CPU_ADX;
>>>>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>> - result |= CPU_BMI2;
>>>>>>>>>>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>> - result |= CPU_SHA;
>>>>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> !=
>>>
>>>> 0)
>>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>> - result |= CPU_FMA;
>>>>>>>>>>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bi
>>>>>>>>>>>>>>>> ts.misalignsse
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> !=
>>>
>>>> 0)
>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks for your time.
>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(UseSSE42Intrinsics,
>>>>>>>>>>>>>>>>>> false);
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> + if (supports_sha()) {
>>>>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>> + if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>> + warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> this
>>>
>>>> CPU");
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>> if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> cpus.
>>>
>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus <
>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>> + // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (UseSHA) {
>>>>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> + } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>> + warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> crypto
>>>
>>>> hash
>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>> + if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>>>>> result |= CPU_CLMUL;
>>>>>>>>>>>>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>>>>> result |= CPU_RTM;
>>>>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> // AMD features.
>>>>>>>>>>>>>>>>>> if (is_amd()) {
>>>>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a
>>>>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> 0)
>>>
>>>> result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>> + if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_HT;
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> // Intel features.
>>>>>>>>>>>>>>>>>> if(is_intel()) {
>>>>>>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>> - result |= CPU_ADX;
>>>>>>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>> - result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>> - result |= CPU_SHA;
>>>>>>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> bits.lzcnt_intel
>>>
>>>> !=
>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>> - result |= CPU_FMA;
>>>>>>>>>>>>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit
>>>>>>>>>>>>>>>>>> 8)
>>>>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> bits.misalignsse
>>
>>
More information about the hotspot-dev
mailing list