答复: [patch] support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai
Vic Wang(BJ-RD)
VicWang at zhaoxin.com
Fri Dec 29 06:27:24 UTC 2017
Hi,
Sorry to send this mail again. I send this mail, but I don’t receive any response . And I don’t receive it by myself since I am in the mailing list. So if you receive this, can you give a response for me?
Best Regards!
VicWang
-----邮件原件-----
发件人: Vic Wang(BJ-RD)
发送时间: 2017年12月28日 15:00
收件人: 'hotspot-dev at openjdk.java.net' <hotspot-dev at openjdk.java.net>
抄送: Cobe Chen(BJ-RD) <CobeChen at zhaoxin.com>
主题: [patch] support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai
Hi,
Here is a hotspot patch for supporting zhaoxin x86 cpu vendor ids (CentaurHauls id and Shanghai id). The original repository is http://hg.openjdk.java.net/jdk/hs.
Please help me to submit the changes . If there is anything incorrectly, please let me know. Thank you very much.
The following is the changeset by the diff command.
diff -Nur org/jdk/src/hotspot/cpu/x86/assembler_x86.cpp zx/jdk/src/hotspot/cpu/x86/assembler_x86.cpp
--- org/jdk/src/hotspot/cpu/x86/assembler_x86.cpp2017-12-08 03:33:00.000000000 +0800
+++ zx/jdk/src/hotspot/cpu/x86/assembler_x86.cpp2017-12-28 11:31:30.000000000 +0800
@@ -3167,6 +3167,89 @@
return;
}
+ if (UseAddressNop && VM_Version::is_zx()) {
+ //
+ // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
+ // 1: 0x90
+ // 2: 0x66 0x90
+ // 3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
+ // 4: 0x0F 0x1F 0x40 0x00
+ // 5: 0x0F 0x1F 0x44 0x00 0x00
+ // 6: 0x66 0x0F 0x1F 0x44 0x00 0x00
+ // 7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
+ // 8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+ // 9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+ // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+ // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
+
+ // The rest coding is ZX specific - don't use consecutive address
+ nops
+
+ // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+ // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+ // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
+ // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66
+ 0x66 0x66 0x90
+
+ while(i >= 15) {
+ // For ZX don't generate consecutive addess nops (mix with regular nops)
+ i -= 15;
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ addr_nop_8();
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8((unsigned char)0x90);
+ // nop
+ }
+ switch (i) {
+ case 14:
+ emit_int8(0x66); // size prefix
+ case 13:
+ emit_int8(0x66); // size prefix
+ case 12:
+ addr_nop_8();
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8(0x66); // size prefix
+ emit_int8((unsigned char)0x90);
+ // nop
+ break;
+ case 11:
+ emit_int8(0x66); // size prefix
+ case 10:
+ emit_int8(0x66); // size prefix
+ case 9:
+ emit_int8(0x66); // size prefix
+ case 8:
+ addr_nop_8();
+ break;
+ case 7:
+ addr_nop_7();
+ break;
+ case 6:
+ emit_int8(0x66); // size prefix
+ case 5:
+ addr_nop_5();
+ break;
+ case 4:
+ addr_nop_4();
+ break;
+ case 3:
+ // Don't use "0x0F 0x1F 0x00" - need patching safe padding
+ emit_int8(0x66); // size prefix
+ case 2:
+ emit_int8(0x66); // size prefix
+ case 1:
+ emit_int8((unsigned char)0x90);
+ // nop
+ break;
+ default:
+ assert(i == 0, " ");
+ }
+ return;
+ }
+
// Using nops with size prefixes "0x66 0x90".
// From AMD Optimization Guide:
// 1: 0x90
diff -Nur org/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp zx/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp
--- org/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp2017-12-08 03:33:00.000000000 +0800
+++ zx/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp2017-12-28 11:30:40.000000000 +0800
@@ -628,6 +628,11 @@
if (UseSSE < 1)
_features &= ~CPU_SSE;
+ //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
+ if(is_zx() && ((cpu_family()==6)||(cpu_family()==7))){
+UseAVX = 0;
+ }
+
// first try initial setting and detect what we can support
int use_avx_limit = 0;
if (UseAVX > 0) {
@@ -1078,6 +1083,66 @@
// UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
// UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
+
+ if( is_zx() ) { // ZX cpus specific settings
+ if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
+ UseStoreImmI16 = false; // don't use it on ZX cpus
+ }
+ if( (cpu_family() == 6)|| (cpu_family() == 7) ) {
+ if( FLAG_IS_DEFAULT(UseAddressNop) ) {
+ // Use it on all ZX cpus
+ UseAddressNop = true;
+ }
+ }
+ if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
+ UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
+ }
+ if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
+ if( supports_sse3() ) {
+ UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
+ } else {
+ UseXmmRegToRegMoveAll = false;
+ }
+ }
+ if( ((cpu_family() == 6)||(cpu_family() == 7)) && supports_sse3() )
+{ // new ZX cpus #ifdef COMPILER2
+ if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
+ // For new ZX cpus do the next optimization:
+ // don't align the beginning of a loop if there are enough instructions
+ // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
+ // in current fetch line (OptoLoopAlignment) or the padding
+ // is big (> MaxLoopPad).
+ // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
+ // generated NOP instructions. 11 is the largest size of one
+ // address NOP instruction '0F 1F' (see Assembler::nop(i)).
+ MaxLoopPad = 11;
+ }
+#endif // COMPILER2
+ if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
+ UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
+ }
+ if (supports_sse4_2()) { // new ZX cpus
+ if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+ UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
+ }
+ }
+ if (supports_sse4_2()) {
+ if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
+ FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
+ }
+ } else {
+ if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+ warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
+ }
+ FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
+ }
+ }
+
+ if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
+ FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
+ }
+ }
+
if( is_amd() ) { // AMD cpus specific settings
if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
// Use it on new AMD cpus starting from Opteron.
@@ -1370,6 +1435,14 @@
#ifdef COMPILER2
if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
+ }
+#endif
+ }
+
+ if (is_zx() && ((cpu_family() == 6)||(cpu_family() == 7)) &&
+supports_sse3()) { #ifdef COMPILER2
+ if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
+ FLAG_SET_DEFAULT(UseFPUForSpilling, true);
}
#endif
}
diff -Nur org/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp zx/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp
--- org/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp2017-12-08 03:33:00.000000000 +0800
+++ zx/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp2017-12-28 14:15:18.000000000 +0800
@@ -305,6 +305,9 @@
enum Extended_Family {
// AMD
CPU_FAMILY_AMD_11H = 0x11,
+ // ZX
+ CPU_FAMILY_ZX_CORE_F6 = 6,
+ CPU_FAMILY_ZX_CORE_F7 = 7,
// Intel
CPU_FAMILY_INTEL_CORE = 6,
CPU_MODEL_NEHALEM = 0x1e,
@@ -549,6 +552,16 @@
}
}
+// ZX features.
+ if(is_zx()) {
+ if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
+ result |= CPU_LZCNT;
+ // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
+ if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
+ result |= CPU_3DNOW_PREFETCH;
+ }
+ }
+
return result;
}
@@ -657,6 +670,7 @@
static bool is_P6() { return cpu_family() >= 6; }
static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
+ static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543)||(_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
@@ -680,7 +694,16 @@
}
} else if (is_amd()) {
result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
- }
+ } else if (is_zx()) {
+ bool supports_topology = supports_processor_topology();
+ if (supports_topology) {
+ result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
+ _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+ }
+ if (!supports_topology || result == 0) {
+ result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
+ }
+}
return result;
}
@@ -688,7 +711,9 @@
uint result = 1;
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
- } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
+ } else if (is_zx() && supports_processor_topology()) {
+ result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
+} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
if (cpu_family() >= 0x17) {
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
} else {
@@ -705,7 +730,9 @@
result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
} else if (is_amd()) {
result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
- }
+ } else if (is_zx()) {
+ result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
+}
if (result < 32) // not defined ?
result = 32; // 32 bytes by default on x86 and other x64
return result;
Finally, running the jtreg test. I run 2 jtreg tests(jtreg:test/jdk:tier1 and jtreg:test/langtools:tier1) and both of them are pass.
The results are as follows.
==============================
Test summary
==============================
TEST TOTAL PASS FAIL ERROR
jtreg:test/jdk:tier1 1660 1660 0 0
jtreg:test/langtools:tier1 3828 3828 0 0
jtreg:test/nashorn:tier1 0 0 0 0
jtreg:test/jaxp:tier1 0 0 0 0
==============================
TEST SUCCESS
Best Regards!
VicWang | R&D
Telephone:+86-01082695388-892477
保密声明:
本邮件含有保密或专有信息,仅供指定收件人使用。严禁对本邮件或其内容做任何未经授权的查阅、使用、复制或转发。
CONFIDENTIAL NOTE:
This email contains confidential or legally privileged information and is for the sole use of its intended recipient. Any unauthorized review, use, copying or forwarding of this email or the content of this email is strictly prohibited.
More information about the hotspot-dev
mailing list