[patch] support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai

Vladimir Kozlov vladimir.kozlov at oracle.com
Thu Jan 4 19:47:46 UTC 2018


Looks good to me.

Thanks,
Vladimir

On 1/4/18 4:20 AM, David Holmes wrote:
> Hi,
> 
> On 4/01/2018 6:35 PM, Vic Wang(BJ-RD) wrote:
>> Hi David:
>> Thanks for the detailed review.
>> I've regenerated the patch by "hg diff" for modifying the coding style issues.
>> Additionally, I don't merge some of the branches into intel's because it may add some codes when 
>> the new cpu features are added.
> 
> Okay this all seems fine. I've updated the webrev in place and fixed the copyright years and a 
> couple of space/indent issues:
> 
> http://cr.openjdk.java.net/~dholmes/8194279/webrev
> 
> if you'd like to check it. Also please advise how you wish the "contributed-by:" line to appear. For 
> now I'm assuming:
> 
> contributed-by: Vic Wang <VicWang at zhaoxin.com>
> 
> We still need a second reviewer.
> 
> Thanks,
> David
> 
>>
>> diff -r 4d28288c9f9e src/hotspot/cpu/x86/assembler_x86.cpp
>> --- a/src/hotspot/cpu/x86/assembler_x86.cppThu Dec 07 15:52:46 2017 +0100
>> +++ b/src/hotspot/cpu/x86/assembler_x86.cppThu Jan 04 12:20:31 2018 +0800
>> @@ -3167,6 +3167,89 @@
>>       return;
>>     }
>>
>> +  if (UseAddressNop && VM_Version::is_zx()) {
>> +    //
>> +    // Using multi-bytes nops "0x0F 0x1F [address]" for ZX
>> +    //  1: 0x90
>> +    //  2: 0x66 0x90
>> +    //  3: 0x66 0x66 0x90 (don't use "0x0F 0x1F 0x00" - need patching safe padding)
>> +    //  4: 0x0F 0x1F 0x40 0x00
>> +    //  5: 0x0F 0x1F 0x44 0x00 0x00
>> +    //  6: 0x66 0x0F 0x1F 0x44 0x00 0x00
>> +    //  7: 0x0F 0x1F 0x80 0x00 0x00 0x00 0x00
>> +    //  8: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
>> +    //  9: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
>> +    // 10: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
>> +    // 11: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00
>> +
>> +    // The rest coding is ZX specific - don't use consecutive address nops
>> +
>> +    // 12: 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
>> +    // 13: 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
>> +    // 14: 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
>> +    // 15: 0x66 0x66 0x66 0x0F 0x1F 0x84 0x00 0x00 0x00 0x00 0x00 0x66 0x66 0x66 0x90
>> +
>> +    while(i >= 15) {
>> +      // For ZX don't generate consecutive addess nops (mix with regular nops)
>> +      i -= 15;
>> +      emit_int8(0x66);   // size prefix
>> +      emit_int8(0x66);   // size prefix
>> +      emit_int8(0x66);   // size prefix
>> +      addr_nop_8();
>> +      emit_int8(0x66);   // size prefix
>> +      emit_int8(0x66);   // size prefix
>> +      emit_int8(0x66);   // size prefix
>> +      emit_int8((unsigned char)0x90);
>> +                         // nop
>> +    }
>> +    switch (i) {
>> +      case 14:
>> +        emit_int8(0x66); // size prefix
>> +      case 13:
>> +        emit_int8(0x66); // size prefix
>> +      case 12:
>> +        addr_nop_8();
>> +        emit_int8(0x66); // size prefix
>> +        emit_int8(0x66); // size prefix
>> +        emit_int8(0x66); // size prefix
>> +        emit_int8((unsigned char)0x90);
>> +                         // nop
>> +        break;
>> +      case 11:
>> +        emit_int8(0x66); // size prefix
>> +      case 10:
>> +        emit_int8(0x66); // size prefix
>> +      case 9:
>> +        emit_int8(0x66); // size prefix
>> +      case 8:
>> +        addr_nop_8();
>> +        break;
>> +      case 7:
>> +        addr_nop_7();
>> +        break;
>> +      case 6:
>> +        emit_int8(0x66); // size prefix
>> +      case 5:
>> +        addr_nop_5();
>> +        break;
>> +      case 4:
>> +        addr_nop_4();
>> +        break;
>> +      case 3:
>> +        // Don't use "0x0F 0x1F 0x00" - need patching safe padding
>> +        emit_int8(0x66); // size prefix
>> +      case 2:
>> +        emit_int8(0x66); // size prefix
>> +      case 1:
>> +        emit_int8((unsigned char)0x90);
>> +                         // nop
>> +        break;
>> +      default:
>> +        assert(i == 0, " ");
>> +    }
>> +    return;
>> +  }
>> +
>>     // Using nops with size prefixes "0x66 0x90".
>>     // From AMD Optimization Guide:
>>     //  1: 0x90
>> diff -r 4d28288c9f9e src/hotspot/cpu/x86/vm_version_x86.cpp
>> --- a/src/hotspot/cpu/x86/vm_version_x86.cppThu Dec 07 15:52:46 2017 +0100
>> +++ b/src/hotspot/cpu/x86/vm_version_x86.cppThu Jan 04 12:20:31 2018 +0800
>> @@ -628,6 +628,11 @@
>>     if (UseSSE < 1)
>>       _features &= ~CPU_SSE;
>>
>> +  //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
>> +  if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))){
>> +UseAVX = 0;
>> +  }
>> +
>>     // first try initial setting and detect what we can support
>>     int use_avx_limit = 0;
>>     if (UseAVX > 0) {
>> @@ -1078,6 +1083,66 @@
>>     // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
>>     // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
>>
>> +
>> +  if (is_zx()) { // ZX cpus specific settings
>> +    if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
>> +      UseStoreImmI16 = false; // don't use it on ZX cpus
>> +    }
>> +    if ((cpu_family() == 6) || (cpu_family() == 7)) {
>> +      if (FLAG_IS_DEFAULT(UseAddressNop)) {
>> +        // Use it on all ZX cpus
>> +        UseAddressNop = true;
>> +      }
>> +    }
>> +    if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
>> +      UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
>> +    }
>> +    if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
>> +      if (supports_sse3()) {
>> +        UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
>> +      } else {
>> +        UseXmmRegToRegMoveAll = false;
>> +      }
>> +    }
>> +    if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
>> +#ifdef COMPILER2
>> +      if (FLAG_IS_DEFAULT(MaxLoopPad)) {
>> +        // For new ZX cpus do the next optimization:
>> +        // don't align the beginning of a loop if there are enough instructions
>> +        // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
>> +        // in current fetch line (OptoLoopAlignment) or the padding
>> +        // is big (> MaxLoopPad).
>> +        // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
>> +        // generated NOP instructions. 11 is the largest size of one
>> +        // address NOP instruction '0F 1F' (see Assembler::nop(i)).
>> +        MaxLoopPad = 11;
>> +      }
>> +#endif // COMPILER2
>> +      if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
>> +        UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
>> +      }
>> +      if (supports_sse4_2()) { // new ZX cpus
>> +        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
>> +          UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
>> +        }
>> +      }
>> +      if (supports_sse4_2()) {
>> +        if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
>> +          FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
>> +        }
>> +      } else {
>> +        if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
>> +          warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be 
>> disabled.");
>> +        }
>> +        FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
>> +      }
>> +    }
>> +
>> +    if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
>> +      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
>> +    }
>> +  }
>> +
>>     if( is_amd() ) { // AMD cpus specific settings
>>       if( supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop) ) {
>>         // Use it on new AMD cpus starting from Opteron.
>> @@ -1374,6 +1439,14 @@
>>   #endif
>>     }
>>
>> +  if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
>> +#ifdef COMPILER2
>> +    if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>> +      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>> +    }
>> +#endif
>> +  }
>> +
>>   #ifdef _LP64
>>     // Prefetch settings
>>
>> diff -r 4d28288c9f9e src/hotspot/cpu/x86/vm_version_x86.hpp
>> --- a/src/hotspot/cpu/x86/vm_version_x86.hppThu Dec 07 15:52:46 2017 +0100
>> +++ b/src/hotspot/cpu/x86/vm_version_x86.hppThu Jan 04 12:20:31 2018 +0800
>> @@ -305,6 +305,9 @@
>>     enum Extended_Family {
>>       // AMD
>>       CPU_FAMILY_AMD_11H       = 0x11,
>> +    // ZX
>> +    CPU_FAMILY_ZX_CORE_F6    = 6,
>> +    CPU_FAMILY_ZX_CORE_F7    = 7,
>>       // Intel
>>       CPU_FAMILY_INTEL_CORE    = 6,
>>       CPU_MODEL_NEHALEM        = 0x1e,
>> @@ -549,6 +552,16 @@
>>         }
>>       }
>>
>> +    // ZX features.
>> +    if (is_zx()) {
>> +      if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>> +        result |= CPU_LZCNT;
>> +      // for ZX, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
>> +      if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>> +        result |= CPU_3DNOW_PREFETCH;
>> +      }
>> +    }
>> +
>>       return result;
>>     }
>>
>> @@ -657,6 +670,7 @@
>>     static bool is_P6()             { return cpu_family() >= 6; }
>>     static bool is_amd()            { assert_is_initialized(); return 
>> _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
>>     static bool is_intel()          { assert_is_initialized(); return 
>> _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
>> +  static bool is_zx()             { assert_is_initialized(); return 
>> (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } 
>> // 'tneC'||'hS  '
>>     static bool is_atom_family()    { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 
>> 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and 
>> Centerton
>>     static bool is_knights_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 
>> 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
>>
>> @@ -680,6 +694,15 @@
>>         }
>>       } else if (is_amd()) {
>>         result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
>> +    } else if (is_zx()) {
>> +      bool supports_topology = supports_processor_topology();
>> +      if (supports_topology) {
>> +        result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
>> +                 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>> +      }
>> +      if (!supports_topology || result == 0) {
>> +        result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
>> +      }
>>       }
>>       return result;
>>     }
>> @@ -688,6 +711,8 @@
>>       uint result = 1;
>>       if (is_intel() && supports_processor_topology()) {
>>         result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>> +    } else if (is_zx() && supports_processor_topology()) {
>> +      result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
>>       } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
>>         if (cpu_family() >= 0x17) {
>>           result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
>> @@ -705,6 +730,8 @@
>>         result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
>>       } else if (is_amd()) {
>>         result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
>> +    } else if (is_zx()) {
>> +      result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
>>       }
>>       if (result < 32) // not defined ?
>>         result = 32;   // 32 bytes by default on x86 and other x64
>>
>>
>> Best Regards!
>> VicWang | R&D
>> Telephone:+86-01082695388-892477
>> -----邮件原件-----
>> 发件人: David Holmes [mailto:david.holmes at oracle.com]
>> 发送时间: 2018年1月3日 18:03
>> 收件人: Vic Wang(BJ-RD) <VicWang at zhaoxin.com>; hotspot-dev at openjdk.java.net
>> 抄送: Cobe Chen(BJ-RD) <CobeChen at zhaoxin.com>
>> 主题: Re: 答复: [patch] support zhaoxin x86 cpu vendor ids CentaurHauls and Shanghai
>>
>> On 3/01/2018 6:32 PM, Vic Wang(BJ-RD) wrote:
>>> Hi David:
>>> I generate a patch using "hg diff". Please check it again. Thanks very much.
>>
>> Thanks.
>>
>>> diff -r 4d28288c9f9e src/hotspot/cpu/x86/assembler_x86.cpp
>>> --- a/src/hotspot/cpu/x86/assembler_x86.cppThu Dec 07 15:52:46 2017
>>> +0100
>>
>> Something stripped out the spaces between the file name and timestamp.
>>
>> I've hosted a webrev for the patch at:
>>
>> http://cr.openjdk.java.net/~dholmes/8194279/webrev
>>
>>
>> A few minor comments. I can't comment on the technical accuracy of course :)
>>
>> All copyright notices need the second year updated to 2018.
>>
>> - src/hotspot/cpu/x86/assembler_x86.cpp
>>
>> Seems fine - you copied the existing pattern so I won't call out any style bugs. :)
>>
>> ---
>>
>> - src/hotspot/cpu/x86/vm_version_x86.cpp
>>
>> 633 UseAVX = 0;
>>
>> Indention is missing.
>>
>> General style nit: your if statements are very inconsistently formatted.
>> A statement like:
>>
>> if( is_zx() )
>>
>> should be written:
>>
>> if (is_zx())
>>
>> - space after the "if" and no space after ( or before )
>>
>>
>> In this block:
>>
>> +     if (is_zx() && ((cpu_family() == 6)||(cpu_family() == 7)) &&
>> supports_sse3()) {
>> + #ifdef COMPILER2
>> +     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
>> +       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>> +     }
>> + #endif
>>
>> I see this copies the existing pattern but isn't it simpler just to write:
>>
>>       if (is_zx() && ((cpu_family() == 6)||(cpu_family() == 7)) &&
>> supports_sse4_2()) {
>> #ifdef COMPILER2
>>       if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
>>         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
>>       }
>> #endif
>>
>> ?
>>
>> ---
>>
>> -  src/hotspot/cpu/x86/vm_version_x86.hpp
>>
>> 310 CPU_FAMILY_ZX_CORE_F7    = 7,
>>
>> Indentation is missing.
>>
>> 555 // ZX features.
>>
>> Indentation is missing.
>>
>>    556     if(is_zx()) {
>>    557       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
>>    558         result |= CPU_LZCNT;
>>    559       // for ZX, ecx.bits.misalignsse bit (bit 8) indicates
>> support for prefetchw
>>    560       if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
>>    561         result |= CPU_3DNOW_PREFETCH;
>>    562       }
>>    563     }
>>
>> The "intel" block you copied is full of style issues :( But as it is exactly the same can't you 
>> just adjust:
>>
>> 546     if(is_intel()) {
>>
>> to
>>
>> 546     if (is_intel() || is_zx()) {
>>
>> ? Similarly for cores_per_cpu(), threads_per_core() and L1_line_size().
>>
>> Otherwise there are indentation issues in your changes to all those functions.
>>
>> Thanks,
>> David
>> -----
>>
>>
>> 保密声明:
>> 本邮件含有保密或专有信息,仅供指定收件人使用。严禁对本邮件或其内容做任何未经授权的查阅、使用、复制 
>> 或转发。
>> CONFIDENTIAL NOTE:
>> This email contains confidential or legally privileged information and is for the sole use of its 
>> intended recipient. Any unauthorized review, use, copying or forwarding of this email or the 
>> content of this email is strictly prohibited.
>>


More information about the hotspot-dev mailing list