[patch] support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai

David Holmes david.holmes at oracle.com
Mon Jan 1 21:17:59 UTC 2018


Hi,

I've created a JBS issue for this enhancement request:

https://bugs.openjdk.java.net/browse/JDK-8194279

On 28/12/2017 4:59 PM, Vic Wang(BJ-RD) wrote:
> Hi,
> Here is a hotspot patch for supporting zhaoxin x86 cpu vendor ids (CentaurHauls id and Shanghai id). The original repository is http://hg.openjdk.java.net/jdk/hs.
> Please help me to submit the changes . If there is anything incorrectly, please let me know. Thank you very much.
> 
> The following is the changeset by the diff command.

Can you generate a patch using "hg diff" or "hg export" please. The raw 
diff can't be applied directly to an existing repo without manual editing.

Thanks,
David

> diff -Nur org/jdk/src/hotspot/cpu/x86/assembler_x86.cpp zx/jdk/src/hotspot/cpu/x86/assembler_x86.cpp
> --- org/jdk/src/hotspot/cpu/x86/assembler_x86.cpp2017-12-08 03:33:00.000000000 +0800
> +++ zx/jdk/src/hotspot/cpu/x86/assembler_x86.cpp2017-12-28 11:31:30.000000000 +0800
> @@ -3167,6 +3167,89 @@
>       return;
>     }
> 
> +  if (UseAddressNop && VM_Version::is_zx()) {
> +    //
> +    // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
> +    //  1: 0x90
> +    //  2: 0x66 0x90
> +    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
> +    //  4: 0x0F 0x1F 0x40 0x00
> +    //  5: 0x0F 0x1F 0x44 0x00 0x00
> +    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
> +    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
> +    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
> +    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
> +    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
> +    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
> +
> +    // The rest coding is ZX specific - don't use consecutive address nops
> +
> +    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
> +    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
> +    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
> +    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
> +
> +    while(i >= 15) {
> +      // For ZX don't generate consecutive addess nops (mix with regular nops)
> +      i -= 15;
> +      emit_int8(0x66);   // size prefix
> +      emit_int8(0x66);   // size prefix
> +      emit_int8(0x66);   // size prefix
> +      addr_nop_8();
> +      emit_int8(0x66);   // size prefix
> +      emit_int8(0x66);   // size prefix
> +      emit_int8(0x66);   // size prefix
> +      emit_int8((unsigned char)0x90);
> +                         // nop
> +    }
> +    switch (i) {
> +      case 14:
> +        emit_int8(0x66); // size prefix
> +      case 13:
> +        emit_int8(0x66); // size prefix
> +      case 12:
> +        addr_nop_8();
> +        emit_int8(0x66); // size prefix
> +        emit_int8(0x66); // size prefix
> +        emit_int8(0x66); // size prefix
> +        emit_int8((unsigned char)0x90);
> +                         // nop
> +        break;
> +      case 11:
> +        emit_int8(0x66); // size prefix
> +      case 10:
> +        emit_int8(0x66); // size prefix
> +      case 9:
> +        emit_int8(0x66); // size prefix
> +      case 8:
> +        addr_nop_8();
> +        break;
> +      case 7:
> +        addr_nop_7();
> +        break;
> +      case 6:
> +        emit_int8(0x66); // size prefix
> +      case 5:
> +        addr_nop_5();
> +        break;
> +      case 4:
> +        addr_nop_4();
> +        break;
> +      case 3:
> +        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
> +        emit_int8(0x66); // size prefix
> +      case 2:
> +        emit_int8(0x66); // size prefix
> +      case 1:
> +        emit_int8((unsigned char)0x90);
> +                         // nop
> +        break;
> +      default:
> +        assert(i == 0, " ");
> +    }
> +    return;
> +  }
> +
>     // Using nops with size prefixes "0x66 0x90".
>     // From AMD Optimization Guide:
>     //  1: 0x90
> diff -Nur org/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp zx/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp
> --- org/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp2017-12-08 03:33:00.000000000 +0800
> +++ zx/jdk/src/hotspot/cpu/x86/vm_version_x86.cpp2017-12-28 11:30:40.000000000 +0800
> @@ -628,6 +628,11 @@
>     if (UseSSE < 1)
>       _features &= ~CPU_SSE;
> 
> +  //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
> +  if(is_zx() && ((cpu_family()==6)||(cpu_family()==7))){
> +UseAVX = 0;
> +  }
> +
>     // first try initial setting and detect what we can support
>     int use_avx_limit = 0;
>     if (UseAVX > 0) {
> @@ -1078,6 +1083,66 @@
>     // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
>     // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
> 
> +
> +  if( is_zx() ) { // ZX cpus specific settings
> +    if( FLAG_IS_DEFAULT(UseStoreImmI16) ) {
> +      UseStoreImmI16 = false; // don't use it on ZX cpus
> +    }
> +    if( (cpu_family() == 6)|| (cpu_family() == 7) ) {
> +      if( FLAG_IS_DEFAULT(UseAddressNop) ) {
> +        // Use it on all ZX cpus
> +        UseAddressNop = true;
> +      }
> +    }
> +    if( FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper) ) {
> +      UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
> +    }
> +    if( FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll) ) {
> +      if( supports_sse3() ) {
> +        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
> +      } else {
> +        UseXmmRegToRegMoveAll = false;
> +      }
> +    }
> +    if( ((cpu_family() == 6)||(cpu_family() == 7)) && supports_sse3() ) { // new ZX cpus
> +#ifdef COMPILER2
> +      if( FLAG_IS_DEFAULT(MaxLoopPad) ) {
> +        // For new ZX cpus do the next optimization:
> +        // don't align the beginning of a loop if there are enough instructions
> +        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
> +        // in current fetch line (OptoLoopAlignment) or the padding
> +        // is big (> MaxLoopPad).
> +        // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
> +        // generated NOP instructions. 11 is the largest size of one
> +        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
> +        MaxLoopPad = 11;
> +      }
> +#endif // COMPILER2
> +      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
> +        UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
> +      }
> +      if (supports_sse4_2()) { // new ZX cpus
> +        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
> +          UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
> +        }
> +      }
> +      if (supports_sse4_2()) {
> +        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
> +          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
> +        }
> +      } else {
> +        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
> +          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
> +        }
> +        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
> +      }
> +    }
> +
> +    if(FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
> +      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
> +    }
> +  }
> +
>     if( is_amd() ) { // AMD cpus specific settings
>       if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
>         // Use it on new AMD cpus starting from Opteron.
> @@ -1370,6 +1435,14 @@
>   #ifdef COMPILER2
>       if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
>         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
> +    }
> +#endif
> +  }
> +
> +    if (is_zx() && ((cpu_family() == 6)||(cpu_family() == 7)) && supports_sse3()) {
> +#ifdef COMPILER2
> +    if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
> +      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>       }
>   #endif
>     }
> diff -Nur org/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp zx/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp
> --- org/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp2017-12-08 03:33:00.000000000 +0800
> +++ zx/jdk/src/hotspot/cpu/x86/vm_version_x86.hpp2017-12-28 14:15:18.000000000 +0800
> @@ -305,6 +305,9 @@
>     enum Extended_Family {
>       // AMD
>       CPU_FAMILY_AMD_11H       = 0x11,
> +    // ZX
> +    CPU_FAMILY_ZX_CORE_F6    = 6,
> +    CPU_FAMILY_ZX_CORE_F7    = 7,
>       // Intel
>       CPU_FAMILY_INTEL_CORE    = 6,
>       CPU_MODEL_NEHALEM        = 0x1e,
> @@ -549,6 +552,16 @@
>         }
>       }
> 
> +// ZX features.
> +    if(is_zx()) {
> +      if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
> +        result |= CPU_LZCNT;
> +      // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
> +      if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
> +        result |= CPU_3DNOW_PREFETCH;
> +      }
> +    }
> +
>       return result;
>     }
> 
> @@ -657,6 +670,7 @@
>     static bool is_P6()             { return cpu_family() >= 6; }
>     static bool is_amd()            { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
>     static bool is_intel()          { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
> +  static bool is_zx()             { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543)||(_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS  '
>     static bool is_atom_family()    { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
>     static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
> 
> @@ -680,7 +694,16 @@
>         }
>       } else if (is_amd()) {
>         result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
> -    }
> +    } else if (is_zx()) {
> +  bool supports_topology = supports_processor_topology();
> +      if (supports_topology) {
> +        result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
> +                 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
> +      }
> +      if (!supports_topology || result == 0) {
> +        result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
> +      }
> +}
>       return result;
>     }
> 
> @@ -688,7 +711,9 @@
>       uint result = 1;
>       if (is_intel() && supports_processor_topology()) {
>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
> -    } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
> +    } else if (is_zx() && supports_processor_topology()) {
> +  result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
> +} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>         if (cpu_family() >= 0x17) {
>           result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>         } else {
> @@ -705,7 +730,9 @@
>         result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
>       } else if (is_amd()) {
>         result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
> -    }
> +    } else if (is_zx()) {
> +  result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
> +}
>       if (result < 32) // not defined ?
>         result = 32;   // 32 bytes by default on x86 and other x64
>       return result;
> 
> Finally, running the jtreg test. I run 2 jtreg tests(jtreg:test/jdk:tier1 and jtreg:test/langtools:tier1) and both of them are pass.
> The results are as follows.
> ==============================
> Test summary
> ==============================
>     TEST                                              TOTAL  PASS  FAIL ERROR
>     jtreg:test/jdk:tier1                               1660  1660     0     0
>     jtreg:test/langtools:tier1                          3828  3828     0     0
>     jtreg:test/nashorn:tier1                              0     0     0     0
>     jtreg:test/jaxp:tier1                                 0     0     0     0
> ==============================
> TEST SUCCESS
> 
> Best Regards!
> VicWang | R&D
> Telephone:+86-01082695388-892477
> 
> 
> 保密声明:
> 本邮件含有保密或专有信息,仅供指定收件人使用。严禁对本邮件或其内容做任何未经授权的查阅、使用、复制或转发。
> CONFIDENTIAL NOTE:
> This email contains confidential or legally privileged information and is for the sole use of its intended recipient. Any unauthorized review, use, copying or forwarding of this email or the content of this email is strictly prohibited.
> 


More information about the hotspot-dev mailing list