RFR (M): 8003854: expand nodes after register allocation [Was: Re: RFR (M): lateExpand: expands nodes after register allocation]

Christian Thalinger christian.thalinger at oracle.com
Wed Nov 21 11:04:50 PST 2012


On Nov 21, 2012, at 6:51 AM, "Lindenmaier, Goetz" <goetz.lindenmaier at sap.com> wrote:

> Hi,
>  
> Michael and Volker talked to you about our late expand feature at JavaOne.
>  
> The lateExpand phase in the C2 compiler expands nodes after register
> allocation.
> Some nodes can not be expanded during matching. E.g., register allocation
> might not be able to deal with the resulting pattern. To allow better
> scheduling in such cases, we introduce lateExpand which runs after
> register allocation. Whether and how nodes are expanded is specified
> in the ad-file. See block.cpp for a detailed documentation. We use this
> for some nodes on ppc, and extensively on ia64.
>  
> The change below contains the code for this feature.
>  
> http://cr.openjdk.java.net/~goetz/webrevs/webrev-lateExpand/
>  
> Below you find an example how to use late expand for the sparc.ad file.
> Further down you see the code generated by adlc.
> Perhaps you can find better use cases for this feature.

I filed:

8003854: expand nodes after register allocation

-- Chris

>  
> Best regards,
>   Goetz.
>  
>                      
> --- a/src/cpu/sparc/vm/sparc.ad   2012-11-21 12:27:04.591486000 +0100      
> +++ b/src/cpu/sparc/vm/sparc.ad    2012-11-19 14:45:15.059452000 +0100                          
> @@ -1933,7 +1937,7 @@                                                                            
>  }                                                                                               
>  
> // Does the CPU require late expand (see block.cpp for description of late expand)?
> -const bool Matcher::require_late_expand = false;
> +const bool Matcher::require_late_expand = true;
>  
> // Should the Matcher clone shifts on addressing modes, expecting them to
> // be subsumed into complex addressing expressions or compute them into
> @@ -7497,6 +7501,7 @@
> // Register Division
> instruct divI_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
>    match(Set dst (DivI src1 src2));
> +  predicate(!UseNewCode);
>    ins_cost((2+71)*DEFAULT_COST);
>  
>    format %{ "SRA     $src2,0,$src2\n\t"
> @@ -7506,6 +7511,68 @@
>    ins_pipe(sdiv_reg_reg);
> %}
>  
> +//------------------------------------------------------------------------------------
> +
> +encode %{
> +
> +  enc_class lateExpandIdiv_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> +    MachNode *m1 = new (C) divI_reg_reg_SRANode();
> +    MachNode *m2 = new (C) divI_reg_reg_SRANode();
> +    MachNode *m3 = new (C) divI_reg_reg_SDIVXNode();
> +
> +    m1->add_req(n_region, n_src1);
> +    m2->add_req(n_region, n_src2);
> +    m3->add_req(n_region, m1, m2);
> +
> +    m1->_opnds[0] = _opnds[1]->clone(C);
> +    m1->_opnds[1] = _opnds[1]->clone(C);
> +
> +    m2->_opnds[0] = _opnds[2]->clone(C);
> +    m2->_opnds[1] = _opnds[2]->clone(C);
> +
> +    m3->_opnds[0] = _opnds[0]->clone(C);
> +    m3->_opnds[1] = _opnds[1]->clone(C);
> +    m3->_opnds[2] = _opnds[2]->clone(C);
> +
> +    ra_->set1(m1->_idx, ra_->get_reg_first(n_src1));
> +    ra_->set1(m2->_idx, ra_->get_reg_first(n_src2));
> +    ra_->set1(m3->_idx, ra_->get_reg_first(this));
> +
> +    nodes->push(m1);
> +    nodes->push(m2);
> +    nodes->push(m3);
> +  %}
> +%}
> +
> +instruct divI_reg_reg_SRA(iRegIsafe dst) %{
> +  effect(USE_DEF dst);
> +  size(4);
> +  format %{ "SRA     $dst,0,$dst\n\t" %}
> +  ins_encode %{ __ sra($dst$$Register, 0, $dst$$Register); %}
> +  ins_pipe(ialu_reg_reg);
> +%}
> +
> +instruct divI_reg_reg_SDIVX(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> +  effect(DEF dst, USE src1, USE src2);
> +  size(4);
> +  format %{ "SDIVX   $src1,$src2,$dst\n\t" %}
> +  ins_encode %{ __ sdivx($dst$$Register, 0, $dst$$Register); %}
> +  ins_pipe(sdiv_reg_reg);
> +%}
> +
> +instruct divI_reg_reg_Ex(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> +  match(Set dst (DivI src1 src2));
> +  predicate(UseNewCode);
> +  ins_cost((2+71)*DEFAULT_COST);
> +
> +  format %{ "SRA     $src2,0,$src2\n\t"
> +            "SRA     $src1,0,$src1\n\t"
> +            "SDIVX   $src1,$src2,$dst" %}
> +  lateExpand( lateExpandIdiv_reg_reg(src1, src2, dst) );
> +%}
> +
> +//------------------------------------------------------------------------------------
> +
> // Immediate Division
> instruct divI_reg_imm13(iRegI dst, iRegIsafe src1, immI13 src2) %{
>    match(Set dst (DivI src1 src2));
>  
>  
>  
>  
> class divI_reg_reg_ExNode : public MachNode {
>   // ...
>   virtual bool           requires_late_expand() const { return true; }
>   virtual void           lateExpand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_);
>   // ...
> };
>  
> void  divI_reg_reg_ExNode::lateExpand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
>   // Start at oper_input_base() and count operands
>   unsigned idx0 = 1;
>   unsigned idx1 = 1;        // src1
>   unsigned idx2 = idx1 + opnd_array(1)->num_edges();                // src2
>   // Access to ins and operands for late expand.
>   unsigned idx_dst   = idx1;         // iRegI,               src1
>   unsigned idx_src1  = idx2;         // iRegIsafe,      src2
>   unsigned idx_src2  = idx0;         // iRegIsafe,      dst
>   Node    *n_region = lookup(0);
>   Node    *n_dst    = lookup(idx_dst);
>   Node    *n_src1   = lookup(idx_src1);
>   Node    *n_src2   = lookup(idx_src2);
>   iRegIOper *op_dst = (iRegIOper *)opnd_array(1);
>   iRegIsafeOper *op_src1 = (iRegIsafeOper *)opnd_array(2);
>   iRegIsafeOper *op_src2 = (iRegIsafeOper *)opnd_array(0);
>   Compile *C = Compile::current();
>   {
> #line 7518 "/net/usr.work/d045726/oJ/8/main-hotspot-outputStream-test/src/cpu/sparc/vm/sparc.ad"
>  
>     MachNode *m1 = new (C) divI_reg_reg_SRANode();
>     MachNode *m2 = new (C) divI_reg_reg_SRANode();
>     MachNode *m3 = new (C) divI_reg_reg_SDIVXNode();
>  
>     m1->add_req(n_region, n_src1);
>     m2->add_req(n_region, n_src2);
>     m3->add_req(n_region, m1, m2);
>  
>     m1->_opnds[0] = _opnds[1]->clone(C);
>     m1->_opnds[1] = _opnds[1]->clone(C);
>  
>     m2->_opnds[0] = _opnds[2]->clone(C);
>     m2->_opnds[1] = _opnds[2]->clone(C);
>  
>     m3->_opnds[0] = _opnds[0]->clone(C);
>     m3->_opnds[1] = _opnds[1]->clone(C);
>     m3->_opnds[2] = _opnds[2]->clone(C);
>  
>     ra_->set1(m1->_idx, ra_->get_reg_first(n_src1));
>     ra_->set1(m2->_idx, ra_->get_reg_first(n_src2));
>     ra_->set1(m3->_idx, ra_->get_reg_first(this));
>    
>     nodes->push(m1);
>     nodes->push(m2);
>     nodes->push(m3);
>  
> #line 11120 "../generated/adfiles/ad_sparc.cpp"
>   }
> }



More information about the hotspot-compiler-dev mailing list