RFR: Newer AMD 17h (EPYC) Processor family defaults
Rohit Arul Raj
rohitarulraj at gmail.com
Thu Sep 14 18:40:52 UTC 2017
Hello Vladimir,
> CPUID check for 0x8000001E should be explicit. Otherwise the code will be
> executed for all above 0x80000008.
>
> + __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above)
> supported?
> + __ jccb(Assembler::belowEqual, ext_cpuid8);
> + __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
> + __ jccb(Assembler::notEqual, ext_cpuid8);
> + //
> + // Extended cpuid(0x8000001E)
> + //
>
AMD17h has CPUID 0x8000001F too, so the notEqual condition will be false.
I have modified the last statement as below:
+ __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
+ __ jccb(Assembler::belowEqual, ext_cpuid8);
+ __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
+ __ jccb(Assembler::below, ext_cpuid8);
Is this OK?
After updating the above changes, I got "Short forward jump exceeds
8-bit offset" error while building openJDK.
#
# A fatal error has been detected by the Java Runtime Environment:
#
# Internal Error (macroAssembler_x86.hpp:116), pid=7786, tid=7787
# guarantee(this->is8bit(imm8)) failed: Short forward jump exceeds 8-bit offset
So I have replaced the short jump with near jump while checking for
CPUID 0x80000005.
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
__ jcc(Assembler::belowEqual, done);
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid1);
+ __ jcc(Assembler::belowEqual, ext_cpuid1);
I have attached the updated, re-tested patch.
diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
b/src/cpu/x86/vm/vm_version_x86.cpp
--- a/src/cpu/x86/vm/vm_version_x86.cpp
+++ b/src/cpu/x86/vm/vm_version_x86.cpp
@@ -70,7 +70,7 @@
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
- Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
done, wrapup;
+ Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
ext_cpuid8, done, wrapup;
Label legacy_setup, save_restore_except, legacy_save_restore,
start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@@ -267,14 +267,30 @@
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
__ jcc(Assembler::belowEqual, done);
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
- __ jccb(Assembler::belowEqual, ext_cpuid1);
+ __ jcc(Assembler::belowEqual, ext_cpuid1);
__ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
__ jccb(Assembler::belowEqual, ext_cpuid5);
__ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
__ jccb(Assembler::belowEqual, ext_cpuid7);
+ __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
+ __ jccb(Assembler::belowEqual, ext_cpuid8);
+ __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
+ __ jccb(Assembler::below, ext_cpuid8);
+ //
+ // Extended cpuid(0x8000001E)
+ //
+ __ movl(rax, 0x8000001E);
+ __ cpuid();
+ __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
+ __ movl(Address(rsi, 0), rax);
+ __ movl(Address(rsi, 4), rbx);
+ __ movl(Address(rsi, 8), rcx);
+ __ movl(Address(rsi,12), rdx);
+
//
// Extended cpuid(0x80000008)
//
+ __ bind(ext_cpuid8);
__ movl(rax, 0x80000008);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@@ -1109,11 +1125,27 @@
}
#ifdef COMPILER2
- if (MaxVectorSize > 16) {
- // Limit vectors size to 16 bytes on current AMD cpus.
+ if (cpu_family() < 0x17 && MaxVectorSize > 16) {
+ // Limit vectors size to 16 bytes on AMD cpus < 17h.
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
#endif // COMPILER2
+
+ // Some defaults for AMD family 17h
+ if ( cpu_family() == 0x17 ) {
+ // On family 17h processors use XMM and UnalignedLoadStores for
Array Copy
+ if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+ FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
+ }
+ if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+ FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
+ }
+#ifdef COMPILER2
+ if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+ }
+#endif
+ }
}
if( is_intel() ) { // Intel cpus specific settings
diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
b/src/cpu/x86/vm/vm_version_x86.hpp
--- a/src/cpu/x86/vm/vm_version_x86.hpp
+++ b/src/cpu/x86/vm/vm_version_x86.hpp
@@ -228,6 +228,15 @@
} bits;
};
+ union ExtCpuid1EEbx {
+ uint32_t value;
+ struct {
+ uint32_t : 8,
+ threads_per_core : 8,
+ : 16;
+ } bits;
+ };
+
union XemXcr0Eax {
uint32_t value;
struct {
@@ -398,6 +407,12 @@
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
+ // cpuid function 0x8000001E // AMD 17h
+ uint32_t ext_cpuid1E_eax;
+ ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
+ uint32_t ext_cpuid1E_ecx;
+ uint32_t ext_cpuid1E_edx; // unused currently
+
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
@@ -505,6 +520,14 @@
result |= CPU_CLMUL;
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
result |= CPU_RTM;
+ if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
+ result |= CPU_ADX;
+ if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
+ result |= CPU_BMI2;
+ if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+ result |= CPU_SHA;
+ if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
+ result |= CPU_FMA;
// AMD features.
if (is_amd()) {
@@ -518,16 +541,8 @@
}
// Intel features.
if(is_intel()) {
- if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
- result |= CPU_ADX;
- if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
- result |= CPU_BMI2;
- if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
- result |= CPU_SHA;
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT;
- if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
- result |= CPU_FMA;
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates
support for prefetchw
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
result |= CPU_3DNOW_PREFETCH;
@@ -590,6 +605,7 @@
static ByteSize ext_cpuid5_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
static ByteSize ext_cpuid8_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
+ static ByteSize ext_cpuid1E_offset() { return
byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
static ByteSize tpl_cpuidB0_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return
byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@@ -673,8 +689,12 @@
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
- result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
- cores_per_cpu();
+ if (cpu_family() >= 0x17) {
+ result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
+ } else {
+ result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
+ cores_per_cpu();
+ }
}
return (result == 0 ? 1 : result);
}
Please let me know your comments.
Thanks for your review.
Regards,
Rohit
>
>
> On 9/11/17 9:52 PM, Rohit Arul Raj wrote:
>>
>> Hello David,
>>
>>>>
>>>>
>>>> 1. ExtCpuid1EEx
>>>>
>>>> Should this be ExtCpuid1EEbx? (I see the naming here is somewhat
>>>> inconsistent - and potentially confusing: I would have preferred to see
>>>> things like ExtCpuid_1E_Ebx, to make it clear.)
>>>
>>>
>>> Yes, I can change it accordingly.
>>>
>>
>> I have attached the updated, re-tested patch as per your comments above.
>>
>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>> b/src/cpu/x86/vm/vm_version_x86.cpp
>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>> @@ -70,7 +70,7 @@
>> bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>
>> Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>> - Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> done, wrapup;
>> + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>> ext_cpuid8, done, wrapup;
>> Label legacy_setup, save_restore_except, legacy_save_restore,
>> start_simd_check;
>>
>> StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>> @@ -272,9 +272,23 @@
>> __ jccb(Assembler::belowEqual, ext_cpuid5);
>> __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
>> __ jccb(Assembler::belowEqual, ext_cpuid7);
>> + __ cmpl(rax, 0x80000008); // Is cpuid(0x8000001E) supported?
>> + __ jccb(Assembler::belowEqual, ext_cpuid8);
>> + //
>> + // Extended cpuid(0x8000001E)
>> + //
>> + __ movl(rax, 0x8000001E);
>> + __ cpuid();
>> + __ lea(rsi, Address(rbp,
>> in_bytes(VM_Version::ext_cpuid_1E_offset())));
>> + __ movl(Address(rsi, 0), rax);
>> + __ movl(Address(rsi, 4), rbx);
>> + __ movl(Address(rsi, 8), rcx);
>> + __ movl(Address(rsi,12), rdx);
>> +
>> //
>> // Extended cpuid(0x80000008)
>> //
>> + __ bind(ext_cpuid8);
>> __ movl(rax, 0x80000008);
>> __ cpuid();
>> __ lea(rsi, Address(rbp,
>> in_bytes(VM_Version::ext_cpuid8_offset())));
>> @@ -1109,11 +1123,27 @@
>> }
>>
>> #ifdef COMPILER2
>> - if (MaxVectorSize > 16) {
>> - // Limit vectors size to 16 bytes on current AMD cpus.
>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>> }
>> #endif // COMPILER2
>> +
>> + // Some defaults for AMD family 17h
>> + if ( cpu_family() == 0x17 ) {
>> + // On family 17h processors use XMM and UnalignedLoadStores for
>> Array Copy
>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>> + }
>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>> + }
>> +#ifdef COMPILER2
>> + if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>> + }
>> +#endif
>> + }
>> }
>>
>> if( is_intel() ) { // Intel cpus specific settings
>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>> b/src/cpu/x86/vm/vm_version_x86.hpp
>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>> @@ -228,6 +228,15 @@
>> } bits;
>> };
>>
>> + union ExtCpuid_1E_Ebx {
>> + uint32_t value;
>> + struct {
>> + uint32_t : 8,
>> + threads_per_core : 8,
>> + : 16;
>> + } bits;
>> + };
>> +
>> union XemXcr0Eax {
>> uint32_t value;
>> struct {
>> @@ -398,6 +407,12 @@
>> ExtCpuid8Ecx ext_cpuid8_ecx;
>> uint32_t ext_cpuid8_edx; // reserved
>>
>> + // cpuid function 0x8000001E // AMD 17h
>> + uint32_t ext_cpuid_1E_eax;
>> + ExtCpuid_1E_Ebx ext_cpuid_1E_ebx; // threads per core (AMD17h)
>> + uint32_t ext_cpuid_1E_ecx;
>> + uint32_t ext_cpuid_1E_edx; // unused currently
>> +
>> // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>> register)
>> XemXcr0Eax xem_xcr0_eax;
>> uint32_t xem_xcr0_edx; // reserved
>> @@ -505,6 +520,14 @@
>> result |= CPU_CLMUL;
>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>> result |= CPU_RTM;
>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> + result |= CPU_ADX;
>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> + result |= CPU_BMI2;
>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> + result |= CPU_SHA;
>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> + result |= CPU_FMA;
>>
>> // AMD features.
>> if (is_amd()) {
>> @@ -518,16 +541,8 @@
>> }
>> // Intel features.
>> if(is_intel()) {
>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>> - result |= CPU_ADX;
>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>> - result |= CPU_BMI2;
>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>> - result |= CPU_SHA;
>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>> result |= CPU_LZCNT;
>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>> - result |= CPU_FMA;
>> // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>> support for prefetchw
>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>> result |= CPU_3DNOW_PREFETCH;
>> @@ -590,6 +605,7 @@
>> static ByteSize ext_cpuid5_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>> static ByteSize ext_cpuid7_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>> static ByteSize ext_cpuid8_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>> + static ByteSize ext_cpuid_1E_offset() { return
>> byte_offset_of(CpuidInfo, ext_cpuid_1E_eax); }
>> static ByteSize tpl_cpuidB0_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>> static ByteSize tpl_cpuidB1_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>> static ByteSize tpl_cpuidB2_offset() { return
>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>> @@ -673,8 +689,11 @@
>> if (is_intel() && supports_processor_topology()) {
>> result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>> } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>> - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> - cores_per_cpu();
>> + if (cpu_family() >= 0x17)
>> + result = _cpuid_info.ext_cpuid_1E_ebx.bits.threads_per_core + 1;
>> + else
>> + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>> + cores_per_cpu();
>> }
>> return (result == 0 ? 1 : result);
>> }
>>
>>
>> Please let me know your comments
>>
>> Thanks for your time.
>>
>> Regards,
>> Rohit
>>
>>
>>>> Thanks,
>>>> David
>>>> -----
>>>>
>>>>
>>>>> Reference:
>>>>>
>>>>> https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf
>>>>> [Pg 82]
>>>>>
>>>>> CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId)
>>>>> 15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh.
>>>>> The number of threads per core is ThreadsPerCore+1.
>>>>>
>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>> @@ -70,7 +70,7 @@
>>>>> bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
>>>>>
>>>>> Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
>>>>> - Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>> done, wrapup;
>>>>> + Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7,
>>>>> ext_cpuid8, done, wrapup;
>>>>> Label legacy_setup, save_restore_except, legacy_save_restore,
>>>>> start_simd_check;
>>>>>
>>>>> StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
>>>>> @@ -272,9 +272,23 @@
>>>>> __ jccb(Assembler::belowEqual, ext_cpuid5);
>>>>> __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
>>>>> __ jccb(Assembler::belowEqual, ext_cpuid7);
>>>>> + __ cmpl(rax, 0x80000008); // Is cpuid(0x8000001E) supported?
>>>>> + __ jccb(Assembler::belowEqual, ext_cpuid8);
>>>>> + //
>>>>> + // Extended cpuid(0x8000001E)
>>>>> + //
>>>>> + __ movl(rax, 0x8000001E);
>>>>> + __ cpuid();
>>>>> + __ lea(rsi, Address(rbp,
>>>>> in_bytes(VM_Version::ext_cpuid1E_offset())));
>>>>> + __ movl(Address(rsi, 0), rax);
>>>>> + __ movl(Address(rsi, 4), rbx);
>>>>> + __ movl(Address(rsi, 8), rcx);
>>>>> + __ movl(Address(rsi,12), rdx);
>>>>> +
>>>>> //
>>>>> // Extended cpuid(0x80000008)
>>>>> //
>>>>> + __ bind(ext_cpuid8);
>>>>> __ movl(rax, 0x80000008);
>>>>> __ cpuid();
>>>>> __ lea(rsi, Address(rbp,
>>>>> in_bytes(VM_Version::ext_cpuid8_offset())));
>>>>> @@ -1109,11 +1123,27 @@
>>>>> }
>>>>>
>>>>> #ifdef COMPILER2
>>>>> - if (MaxVectorSize > 16) {
>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>> }
>>>>> #endif // COMPILER2
>>>>> +
>>>>> + // Some defaults for AMD family 17h
>>>>> + if ( cpu_family() == 0x17 ) {
>>>>> + // On family 17h processors use XMM and UnalignedLoadStores for
>>>>> Array Copy
>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>> + }
>>>>> + if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>> {
>>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>> + }
>>>>> +#ifdef COMPILER2
>>>>> + if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>> + }
>>>>> +#endif
>>>>> + }
>>>>> }
>>>>>
>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>> @@ -228,6 +228,15 @@
>>>>> } bits;
>>>>> };
>>>>>
>>>>> + union ExtCpuid1EEx {
>>>>> + uint32_t value;
>>>>> + struct {
>>>>> + uint32_t : 8,
>>>>> + threads_per_core : 8,
>>>>> + : 16;
>>>>> + } bits;
>>>>> + };
>>>>> +
>>>>> union XemXcr0Eax {
>>>>> uint32_t value;
>>>>> struct {
>>>>> @@ -398,6 +407,12 @@
>>>>> ExtCpuid8Ecx ext_cpuid8_ecx;
>>>>> uint32_t ext_cpuid8_edx; // reserved
>>>>>
>>>>> + // cpuid function 0x8000001E // AMD 17h
>>>>> + uint32_t ext_cpuid1E_eax;
>>>>> + ExtCpuid1EEx ext_cpuid1E_ebx; // threads per core (AMD17h)
>>>>> + uint32_t ext_cpuid1E_ecx;
>>>>> + uint32_t ext_cpuid1E_edx; // unused currently
>>>>> +
>>>>> // extended control register XCR0 (the XFEATURE_ENABLED_MASK
>>>>> register)
>>>>> XemXcr0Eax xem_xcr0_eax;
>>>>> uint32_t xem_xcr0_edx; // reserved
>>>>> @@ -505,6 +520,14 @@
>>>>> result |= CPU_CLMUL;
>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>> result |= CPU_RTM;
>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>> + result |= CPU_ADX;
>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>> + result |= CPU_BMI2;
>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>> + result |= CPU_SHA;
>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>> + result |= CPU_FMA;
>>>>>
>>>>> // AMD features.
>>>>> if (is_amd()) {
>>>>> @@ -518,16 +541,8 @@
>>>>> }
>>>>> // Intel features.
>>>>> if(is_intel()) {
>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>> - result |= CPU_ADX;
>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>> - result |= CPU_BMI2;
>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>> - result |= CPU_SHA;
>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>> result |= CPU_LZCNT;
>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>> - result |= CPU_FMA;
>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8) indicates
>>>>> support for prefetchw
>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>> @@ -590,6 +605,7 @@
>>>>> static ByteSize ext_cpuid5_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
>>>>> static ByteSize ext_cpuid7_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
>>>>> static ByteSize ext_cpuid8_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
>>>>> + static ByteSize ext_cpuid1E_offset() { return
>>>>> byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
>>>>> static ByteSize tpl_cpuidB0_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
>>>>> static ByteSize tpl_cpuidB1_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
>>>>> static ByteSize tpl_cpuidB2_offset() { return
>>>>> byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
>>>>> @@ -673,8 +689,11 @@
>>>>> if (is_intel() && supports_processor_topology()) {
>>>>> result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>>>> } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>>>> - result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>> - cores_per_cpu();
>>>>> + if (cpu_family() >= 0x17)
>>>>> + result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core +
>>>>> 1;
>>>>> + else
>>>>> + result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
>>>>> + cores_per_cpu();
>>>>> }
>>>>> return (result == 0 ? 1 : result);
>>>>> }
>>>>>
>>>>> I have attached the patch for review.
>>>>> Please let me know your comments.
>>>>>
>>>>> Thanks,
>>>>> Rohit
>>>>>
>>>>>> Thanks,
>>>>>> Vladimir
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>
>>>>>>> No comments on AMD specific changes.
>>>>>>>
>>>>>>> Thanks,
>>>>>>> David
>>>>>>> -----
>>>>>>>
>>>>>>> On 5/09/2017 3:43 PM, David Holmes wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 5/09/2017 3:29 PM, Rohit Arul Raj wrote:
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Hello David,
>>>>>>>>>
>>>>>>>>> On Tue, Sep 5, 2017 at 10:31 AM, David Holmes
>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>> wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> Hi Rohit,
>>>>>>>>>>
>>>>>>>>>> I was unable to apply your patch to latest jdk10/hs/hotspot repo.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> I checked out the latest jdk10/hs/hotspot [parent:
>>>>>>>>> 13548:1a9c2e07a826]
>>>>>>>>> and was able to apply the patch [epyc-amd17h-defaults-3Sept.patch]
>>>>>>>>> without any issues.
>>>>>>>>> Can you share the error message that you are getting?
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> I was getting this:
>>>>>>>>
>>>>>>>> applying hotspot.patch
>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>> Hunk #1 FAILED at 1108
>>>>>>>> 1 out of 1 hunks FAILED -- saving rejects to file
>>>>>>>> src/cpu/x86/vm/vm_version_x86.cpp.rej
>>>>>>>> patching file src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>> Hunk #2 FAILED at 522
>>>>>>>> 1 out of 2 hunks FAILED -- saving rejects to file
>>>>>>>> src/cpu/x86/vm/vm_version_x86.hpp.rej
>>>>>>>> abort: patch failed to apply
>>>>>>>>
>>>>>>>> but I started again and this time it applied fine, so not sure what
>>>>>>>> was
>>>>>>>> going on there.
>>>>>>>>
>>>>>>>> Cheers,
>>>>>>>> David
>>>>>>>>
>>>>>>>>> Regards,
>>>>>>>>> Rohit
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> On 4/09/2017 2:42 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>
>>>>>>>>>>> On Sat, Sep 2, 2017 at 11:25 PM, Vladimir Kozlov
>>>>>>>>>>> <vladimir.kozlov at oracle.com> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>
>>>>>>>>>>>> On 9/2/17 1:16 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Hello Vladimir,
>>>>>>>>>>>>>
>>>>>>>>>>>>>> Changes look good. Only question I have is about
>>>>>>>>>>>>>> MaxVectorSize.
>>>>>>>>>>>>>> It
>>>>>>>>>>>>>> is
>>>>>>>>>>>>>> set
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>> 16 only in presence of AVX:
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l945
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Does that code works for AMD 17h too?
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks for pointing that out. Yes, the code works fine for AMD
>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>> So
>>>>>>>>>>>>> I have removed the surplus check for MaxVectorSize from my
>>>>>>>>>>>>> patch.
>>>>>>>>>>>>> I
>>>>>>>>>>>>> have updated, re-tested and attached the patch.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> Which check you removed?
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> My older patch had the below mentioned check which was required
>>>>>>>>>>> on
>>>>>>>>>>> JDK9 where the default MaxVectorSize was 64. It has been handled
>>>>>>>>>>> better in openJDK10. So this check is not required anymore.
>>>>>>>>>>>
>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>> ...
>>>>>>>>>>> ...
>>>>>>>>>>> + if (MaxVectorSize > 32) {
>>>>>>>>>>> + FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>> + }
>>>>>>>>>>> ..
>>>>>>>>>>> ..
>>>>>>>>>>> + }
>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> I have one query regarding the setting of UseSHA flag:
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/file/046eab27258f/src/cpu/x86/vm/vm_version_x86.cpp#l821
>>>>>>>>>>>>>
>>>>>>>>>>>>> AMD 17h has support for SHA.
>>>>>>>>>>>>> AMD 15h doesn't have support for SHA. Still "UseSHA" flag gets
>>>>>>>>>>>>> enabled for it based on the availability of BMI2 and AVX2. Is
>>>>>>>>>>>>> there
>>>>>>>>>>>>> an
>>>>>>>>>>>>> underlying reason for this? I have handled this in the patch
>>>>>>>>>>>>> but
>>>>>>>>>>>>> just
>>>>>>>>>>>>> wanted to confirm.
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>>> It was done with next changes which use only AVX2 and BMI2
>>>>>>>>>>>> instructions
>>>>>>>>>>>> to
>>>>>>>>>>>> calculate SHA-256:
>>>>>>>>>>>>
>>>>>>>>>>>> http://hg.openjdk.java.net/jdk10/hs/hotspot/rev/6a17c49de974
>>>>>>>>>>>>
>>>>>>>>>>>> I don't know if AMD 15h supports these instructions and can
>>>>>>>>>>>> execute
>>>>>>>>>>>> that
>>>>>>>>>>>> code. You need to test it.
>>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> Ok, got it. Since AMD15h has support for AVX2 and BMI2
>>>>>>>>>>> instructions,
>>>>>>>>>>> it should work.
>>>>>>>>>>> Confirmed by running following sanity tests:
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA1Intrinsics.java
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA512Intrinsics.java
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> ./hotspot/test/compiler/intrinsics/sha/sanity/TestSHA256Intrinsics.java
>>>>>>>>>>>
>>>>>>>>>>> So I have removed those SHA checks from my patch too.
>>>>>>>>>>>
>>>>>>>>>>> Please find attached updated, re-tested patch.
>>>>>>>>>>>
>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>> @@ -1109,11 +1109,27 @@
>>>>>>>>>>> }
>>>>>>>>>>>
>>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>> }
>>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>>> +
>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>> + // On family 17h processors use XMM and
>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>> for
>>>>>>>>>>> Array Copy
>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>>>>>>>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>> + }
>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>> {
>>>>>>>>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>> + }
>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>> + if (supports_sse4_2() &&
>>>>>>>>>>> FLAG_IS_DEFAULT(UseFPUForSpilling))
>>>>>>>>>>> {
>>>>>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>> + }
>>>>>>>>>>> +#endif
>>>>>>>>>>> + }
>>>>>>>>>>> }
>>>>>>>>>>>
>>>>>>>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>> result |= CPU_CLMUL;
>>>>>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>> result |= CPU_RTM;
>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>>
>>>>>>>>>>> // AMD features.
>>>>>>>>>>> if (is_amd()) {
>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>> result |= CPU_SSE4A;
>>>>>>>>>>> + if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>> + result |= CPU_HT;
>>>>>>>>>>> }
>>>>>>>>>>> // Intel features.
>>>>>>>>>>> if(is_intel()) {
>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>> - result |= CPU_ADX;
>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>> - result |= CPU_BMI2;
>>>>>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>> - result |= CPU_SHA;
>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>> - result |= CPU_FMA;
>>>>>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>> indicates
>>>>>>>>>>> support for prefetchw
>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0)
>>>>>>>>>>> {
>>>>>>>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>
>>>>>>>>>>> Please let me know your comments.
>>>>>>>>>>>
>>>>>>>>>>> Thanks for your time.
>>>>>>>>>>> Rohit
>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Thanks for taking time to review the code.
>>>>>>>>>>>>>
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>> }
>>>>>>>>>>>>> FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>> }
>>>>>>>>>>>>> + if (supports_sha()) {
>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>> ||
>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>> + if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>> + warning("SHA instructions are not available on this
>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>>
>>>>>>>>>>>>> // some defaults for AMD family 15h
>>>>>>>>>>>>> if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>> @@ -1109,11 +1125,40 @@
>>>>>>>>>>>>> }
>>>>>>>>>>>>>
>>>>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>> }
>>>>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>>>>> +
>>>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>> + // On family 17h processors use XMM and
>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>> for
>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + if (supports_bmi2() &&
>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>> {
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseBMI2Instructions, true);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + if (UseSHA) {
>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> + } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>> + warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>> hash
>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>> + if (supports_sse4_2()) {
>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>> + }
>>>>>>>>>>>>> }
>>>>>>>>>>>>>
>>>>>>>>>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>> result |= CPU_CLMUL;
>>>>>>>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>> result |= CPU_RTM;
>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>>>>
>>>>>>>>>>>>> // AMD features.
>>>>>>>>>>>>> if (is_amd()) {
>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>> result |= CPU_SSE4A;
>>>>>>>>>>>>> + if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>> + result |= CPU_HT;
>>>>>>>>>>>>> }
>>>>>>>>>>>>> // Intel features.
>>>>>>>>>>>>> if(is_intel()) {
>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>> - result |= CPU_ADX;
>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>> - result |= CPU_BMI2;
>>>>>>>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>> - result |= CPU_SHA;
>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel !=
>>>>>>>>>>>>> 0)
>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>> - result |= CPU_FMA;
>>>>>>>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>> indicates
>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse !=
>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>> On 9/1/17 8:04 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 10:27 AM, Rohit Arul Raj
>>>>>>>>>>>>>>> <rohitarulraj at gmail.com>
>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> On Fri, Sep 1, 2017 at 3:01 AM, David Holmes
>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> I think the patch needs updating for jdk10 as I already see
>>>>>>>>>>>>>>>>> a
>>>>>>>>>>>>>>>>> lot of
>>>>>>>>>>>>>>>>> logic
>>>>>>>>>>>>>>>>> around UseSHA in vm_version_x86.cpp.
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Thanks David, I will update the patch wrt JDK10 source base,
>>>>>>>>>>>>>>>> test
>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>> resubmit for review.
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Hi All,
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> I have updated the patch wrt openjdk10/hotspot (parent:
>>>>>>>>>>>>>>> 13519:71337910df60), did regression testing using jtreg
>>>>>>>>>>>>>>> ($make
>>>>>>>>>>>>>>> default) and didnt find any regressions.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Can anyone please volunteer to review this patch which sets
>>>>>>>>>>>>>>> flag/ISA
>>>>>>>>>>>>>>> defaults for newer AMD 17h (EPYC) processor?
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> ************************* Patch ****************************
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>> @@ -1088,6 +1088,22 @@
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>> + if (supports_sha()) {
>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>> + if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>> + warning("SHA instructions are not available on this
>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> // some defaults for AMD family 15h
>>>>>>>>>>>>>>> if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>> @@ -1109,11 +1125,43 @@
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD cpus.
>>>>>>>>>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus < 17h.
>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>> + // On family 17h processors use XMM and
>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> + UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>> + UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + if (supports_bmi2() &&
>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions)) {
>>>>>>>>>>>>>>> + UseBMI2Instructions = true;
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + if (UseSHA) {
>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> + } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>> + warning("Intrinsics for SHA-384 and SHA-512 crypto
>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>> + if (supports_sse4_2()) {
>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> if( is_intel() ) { // Intel cpus specific settings
>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>> @@ -505,6 +505,14 @@
>>>>>>>>>>>>>>> result |= CPU_CLMUL;
>>>>>>>>>>>>>>> if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
>>>>>>>>>>>>>>> result |= CPU_RTM;
>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> // AMD features.
>>>>>>>>>>>>>>> if (is_amd()) {
>>>>>>>>>>>>>>> @@ -515,19 +523,13 @@
>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
>>>>>>>>>>>>>>> result |= CPU_SSE4A;
>>>>>>>>>>>>>>> + if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>> + result |= CPU_HT;
>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>> // Intel features.
>>>>>>>>>>>>>>> if(is_intel()) {
>>>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>> - result |= CPU_ADX;
>>>>>>>>>>>>>>> - if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>> - result |= CPU_BMI2;
>>>>>>>>>>>>>>> - if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>> - result |= CPU_SHA;
>>>>>>>>>>>>>>> if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel
>>>>>>>>>>>>>>> != 0)
>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>> - if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>> - result |= CPU_FMA;
>>>>>>>>>>>>>>> // for Intel, ecx.bits.misalignsse bit (bit 8)
>>>>>>>>>>>>>>> indicates
>>>>>>>>>>>>>>> support for prefetchw
>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse
>>>>>>>>>>>>>>> !=
>>>>>>>>>>>>>>> 0) {
>>>>>>>>>>>>>>> result |= CPU_3DNOW_PREFETCH;
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> **************************************************************
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>> On 1/09/2017 1:11 AM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> On Thu, Aug 31, 2017 at 5:59 PM, David Holmes
>>>>>>>>>>>>>>>>>> <david.holmes at oracle.com>
>>>>>>>>>>>>>>>>>> wrote:
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Hi Rohit,
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> On 31/08/2017 7:03 PM, Rohit Arul Raj wrote:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> I would like an volunteer to review this patch
>>>>>>>>>>>>>>>>>>>> (openJDK9)
>>>>>>>>>>>>>>>>>>>> which
>>>>>>>>>>>>>>>>>>>> sets
>>>>>>>>>>>>>>>>>>>> flag/ISA defaults for newer AMD 17h (EPYC) processor and
>>>>>>>>>>>>>>>>>>>> help
>>>>>>>>>>>>>>>>>>>> us
>>>>>>>>>>>>>>>>>>>> with
>>>>>>>>>>>>>>>>>>>> the commit process.
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> Webrev:
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> https://www.dropbox.com/sh/08bsxaxupg8kbam/AADurTXLGIZ6C-tiIAi_Glyka?dl=0
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Unfortunately patches can not be accepted from systems
>>>>>>>>>>>>>>>>>>> outside
>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>> OpenJDK
>>>>>>>>>>>>>>>>>>> infrastructure and ...
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> I have also attached the patch (hg diff -g) for
>>>>>>>>>>>>>>>>>>>> reference.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> ... unfortunately patches tend to get stripped by the
>>>>>>>>>>>>>>>>>>> mail
>>>>>>>>>>>>>>>>>>> servers.
>>>>>>>>>>>>>>>>>>> If
>>>>>>>>>>>>>>>>>>> the
>>>>>>>>>>>>>>>>>>> patch is small please include it inline. Otherwise you
>>>>>>>>>>>>>>>>>>> will
>>>>>>>>>>>>>>>>>>> need
>>>>>>>>>>>>>>>>>>> to
>>>>>>>>>>>>>>>>>>> find
>>>>>>>>>>>>>>>>>>> an
>>>>>>>>>>>>>>>>>>> OpenJDK Author who can host it for you on
>>>>>>>>>>>>>>>>>>> cr.openjdk.java.net.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>> 3) I have done regression testing using jtreg ($make
>>>>>>>>>>>>>>>>>>>> default)
>>>>>>>>>>>>>>>>>>>> and
>>>>>>>>>>>>>>>>>>>> didnt find any regressions.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Sounds good, but until I see the patch it is hard to
>>>>>>>>>>>>>>>>>>> comment
>>>>>>>>>>>>>>>>>>> on
>>>>>>>>>>>>>>>>>>> testing
>>>>>>>>>>>>>>>>>>> requirements.
>>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>> Thanks,
>>>>>>>>>>>>>>>>>>> David
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Thanks David,
>>>>>>>>>>>>>>>>>> Yes, it's a small patch.
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.cpp
>>>>>>>>>>>>>>>>>> @@ -1051,6 +1051,22 @@
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> + if (supports_sha()) {
>>>>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + } else if (UseSHA || UseSHA1Intrinsics ||
>>>>>>>>>>>>>>>>>> UseSHA256Intrinsics
>>>>>>>>>>>>>>>>>> ||
>>>>>>>>>>>>>>>>>> UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>> + if (!FLAG_IS_DEFAULT(UseSHA) ||
>>>>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA1Intrinsics) ||
>>>>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA256Intrinsics) ||
>>>>>>>>>>>>>>>>>> + !FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>> + warning("SHA instructions are not available on
>>>>>>>>>>>>>>>>>> this
>>>>>>>>>>>>>>>>>> CPU");
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA, false);
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> // some defaults for AMD family 15h
>>>>>>>>>>>>>>>>>> if ( cpu_family() == 0x15 ) {
>>>>>>>>>>>>>>>>>> @@ -1072,11 +1088,43 @@
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> #ifdef COMPILER2
>>>>>>>>>>>>>>>>>> - if (MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>> - // Limit vectors size to 16 bytes on current AMD
>>>>>>>>>>>>>>>>>> cpus.
>>>>>>>>>>>>>>>>>> + if (cpu_family() < 0x17 && MaxVectorSize > 16) {
>>>>>>>>>>>>>>>>>> + // Limit vectors size to 16 bytes on AMD cpus <
>>>>>>>>>>>>>>>>>> 17h.
>>>>>>>>>>>>>>>>>> FLAG_SET_DEFAULT(MaxVectorSize, 16);
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> #endif // COMPILER2
>>>>>>>>>>>>>>>>>> +
>>>>>>>>>>>>>>>>>> + // Some defaults for AMD family 17h
>>>>>>>>>>>>>>>>>> + if ( cpu_family() == 0x17 ) {
>>>>>>>>>>>>>>>>>> + // On family 17h processors use XMM and
>>>>>>>>>>>>>>>>>> UnalignedLoadStores
>>>>>>>>>>>>>>>>>> for
>>>>>>>>>>>>>>>>>> Array Copy
>>>>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseXMMForArrayCopy))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> + UseXMMForArrayCopy = true;
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (supports_sse2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseUnalignedLoadStores))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> + UseUnalignedLoadStores = true;
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (supports_bmi2() &&
>>>>>>>>>>>>>>>>>> FLAG_IS_DEFAULT(UseBMI2Instructions))
>>>>>>>>>>>>>>>>>> {
>>>>>>>>>>>>>>>>>> + UseBMI2Instructions = true;
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (MaxVectorSize > 32) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(MaxVectorSize, 32);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + if (UseSHA) {
>>>>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> + } else if (UseSHA512Intrinsics) {
>>>>>>>>>>>>>>>>>> + warning("Intrinsics for SHA-384 and SHA-512
>>>>>>>>>>>>>>>>>> crypto
>>>>>>>>>>>>>>>>>> hash
>>>>>>>>>>>>>>>>>> functions not available on this CPU.");
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> +#ifdef COMPILER2
>>>>>>>>>>>>>>>>>> + if (supports_sse4_2()) {
>>>>>>>>>>>>>>>>>> + if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>>>>>>>>>>>>>>>>> + FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> +#endif
>>>>>>>>>>>>>>>>>> + }
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> if( is_intel() ) { // Intel cpus specific
>>>>>>>>>>>>>>>>>> settings
>>>>>>>>>>>>>>>>>> diff --git a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> --- a/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> +++ b/src/cpu/x86/vm/vm_version_x86.hpp
>>>>>>>>>>>>>>>>>> @@ -513,6 +513,16 @@
>>>>>>>>>>>>>>>>>> result |= CPU_LZCNT;
>>>>>>>>>>>>>>>>>> if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a !=
>>>>>>>>>>>>>>>>>> 0)
>>>>>>>>>>>>>>>>>> result |= CPU_SSE4A;
>>>>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_BMI2;
>>>>>>>>>>>>>>>>>> + if(_cpuid_info.std_cpuid1_edx.bits.ht != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_HT;
>>>>>>>>>>>>>>>>>> + if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_ADX;
>>>>>>>>>>>>>>>>>> + if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_SHA;
>>>>>>>>>>>>>>>>>> + if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
>>>>>>>>>>>>>>>>>> + result |= CPU_FMA;
>>>>>>>>>>>>>>>>>> }
>>>>>>>>>>>>>>>>>> // Intel features.
>>>>>>>>>>>>>>>>>> if(is_intel()) {
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>> Regards,
>>>>>>>>>>>>>>>>>> Rohit
>>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>
>>>>>>>>>>>>
>>>>>>>>>>
>>>>>>
>>> >
More information about the hotspot-dev
mailing list