RFR (M): 8003854: expand nodes after register allocation [Was: Re: RFR (M): lateExpand: expands nodes after register allocation]
Christian Thalinger
christian.thalinger at oracle.com
Wed Nov 21 11:04:50 PST 2012
On Nov 21, 2012, at 6:51 AM, "Lindenmaier, Goetz" <goetz.lindenmaier at sap.com> wrote:
> Hi,
>
> Michael and Volker talked to you about our late expand feature at JavaOne.
>
> The lateExpand phase in the C2 compiler expands nodes after register
> allocation.
> Some nodes can not be expanded during matching. E.g., register allocation
> might not be able to deal with the resulting pattern. To allow better
> scheduling in such cases, we introduce lateExpand which runs after
> register allocation. Whether and how nodes are expanded is specified
> in the ad-file. See block.cpp for a detailed documentation. We use this
> for some nodes on ppc, and extensively on ia64.
>
> The change below contains the code for this feature.
>
> http://cr.openjdk.java.net/~goetz/webrevs/webrev-lateExpand/
>
> Below you find an example how to use late expand for the sparc.ad file.
> Further down you see the code generated by adlc.
> Perhaps you can find better use cases for this feature.
I filed:
8003854: expand nodes after register allocation
-- Chris
>
> Best regards,
> Goetz.
>
>
> --- a/src/cpu/sparc/vm/sparc.ad 2012-11-21 12:27:04.591486000 +0100
> +++ b/src/cpu/sparc/vm/sparc.ad 2012-11-19 14:45:15.059452000 +0100
> @@ -1933,7 +1937,7 @@
> }
>
> // Does the CPU require late expand (see block.cpp for description of late expand)?
> -const bool Matcher::require_late_expand = false;
> +const bool Matcher::require_late_expand = true;
>
> // Should the Matcher clone shifts on addressing modes, expecting them to
> // be subsumed into complex addressing expressions or compute them into
> @@ -7497,6 +7501,7 @@
> // Register Division
> instruct divI_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> match(Set dst (DivI src1 src2));
> + predicate(!UseNewCode);
> ins_cost((2+71)*DEFAULT_COST);
>
> format %{ "SRA $src2,0,$src2\n\t"
> @@ -7506,6 +7511,68 @@
> ins_pipe(sdiv_reg_reg);
> %}
>
> +//------------------------------------------------------------------------------------
> +
> +encode %{
> +
> + enc_class lateExpandIdiv_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> + MachNode *m1 = new (C) divI_reg_reg_SRANode();
> + MachNode *m2 = new (C) divI_reg_reg_SRANode();
> + MachNode *m3 = new (C) divI_reg_reg_SDIVXNode();
> +
> + m1->add_req(n_region, n_src1);
> + m2->add_req(n_region, n_src2);
> + m3->add_req(n_region, m1, m2);
> +
> + m1->_opnds[0] = _opnds[1]->clone(C);
> + m1->_opnds[1] = _opnds[1]->clone(C);
> +
> + m2->_opnds[0] = _opnds[2]->clone(C);
> + m2->_opnds[1] = _opnds[2]->clone(C);
> +
> + m3->_opnds[0] = _opnds[0]->clone(C);
> + m3->_opnds[1] = _opnds[1]->clone(C);
> + m3->_opnds[2] = _opnds[2]->clone(C);
> +
> + ra_->set1(m1->_idx, ra_->get_reg_first(n_src1));
> + ra_->set1(m2->_idx, ra_->get_reg_first(n_src2));
> + ra_->set1(m3->_idx, ra_->get_reg_first(this));
> +
> + nodes->push(m1);
> + nodes->push(m2);
> + nodes->push(m3);
> + %}
> +%}
> +
> +instruct divI_reg_reg_SRA(iRegIsafe dst) %{
> + effect(USE_DEF dst);
> + size(4);
> + format %{ "SRA $dst,0,$dst\n\t" %}
> + ins_encode %{ __ sra($dst$$Register, 0, $dst$$Register); %}
> + ins_pipe(ialu_reg_reg);
> +%}
> +
> +instruct divI_reg_reg_SDIVX(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> + effect(DEF dst, USE src1, USE src2);
> + size(4);
> + format %{ "SDIVX $src1,$src2,$dst\n\t" %}
> + ins_encode %{ __ sdivx($dst$$Register, 0, $dst$$Register); %}
> + ins_pipe(sdiv_reg_reg);
> +%}
> +
> +instruct divI_reg_reg_Ex(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
> + match(Set dst (DivI src1 src2));
> + predicate(UseNewCode);
> + ins_cost((2+71)*DEFAULT_COST);
> +
> + format %{ "SRA $src2,0,$src2\n\t"
> + "SRA $src1,0,$src1\n\t"
> + "SDIVX $src1,$src2,$dst" %}
> + lateExpand( lateExpandIdiv_reg_reg(src1, src2, dst) );
> +%}
> +
> +//------------------------------------------------------------------------------------
> +
> // Immediate Division
> instruct divI_reg_imm13(iRegI dst, iRegIsafe src1, immI13 src2) %{
> match(Set dst (DivI src1 src2));
>
>
>
>
> class divI_reg_reg_ExNode : public MachNode {
> // ...
> virtual bool requires_late_expand() const { return true; }
> virtual void lateExpand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_);
> // ...
> };
>
> void divI_reg_reg_ExNode::lateExpand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
> // Start at oper_input_base() and count operands
> unsigned idx0 = 1;
> unsigned idx1 = 1; // src1
> unsigned idx2 = idx1 + opnd_array(1)->num_edges(); // src2
> // Access to ins and operands for late expand.
> unsigned idx_dst = idx1; // iRegI, src1
> unsigned idx_src1 = idx2; // iRegIsafe, src2
> unsigned idx_src2 = idx0; // iRegIsafe, dst
> Node *n_region = lookup(0);
> Node *n_dst = lookup(idx_dst);
> Node *n_src1 = lookup(idx_src1);
> Node *n_src2 = lookup(idx_src2);
> iRegIOper *op_dst = (iRegIOper *)opnd_array(1);
> iRegIsafeOper *op_src1 = (iRegIsafeOper *)opnd_array(2);
> iRegIsafeOper *op_src2 = (iRegIsafeOper *)opnd_array(0);
> Compile *C = Compile::current();
> {
> #line 7518 "/net/usr.work/d045726/oJ/8/main-hotspot-outputStream-test/src/cpu/sparc/vm/sparc.ad"
>
> MachNode *m1 = new (C) divI_reg_reg_SRANode();
> MachNode *m2 = new (C) divI_reg_reg_SRANode();
> MachNode *m3 = new (C) divI_reg_reg_SDIVXNode();
>
> m1->add_req(n_region, n_src1);
> m2->add_req(n_region, n_src2);
> m3->add_req(n_region, m1, m2);
>
> m1->_opnds[0] = _opnds[1]->clone(C);
> m1->_opnds[1] = _opnds[1]->clone(C);
>
> m2->_opnds[0] = _opnds[2]->clone(C);
> m2->_opnds[1] = _opnds[2]->clone(C);
>
> m3->_opnds[0] = _opnds[0]->clone(C);
> m3->_opnds[1] = _opnds[1]->clone(C);
> m3->_opnds[2] = _opnds[2]->clone(C);
>
> ra_->set1(m1->_idx, ra_->get_reg_first(n_src1));
> ra_->set1(m2->_idx, ra_->get_reg_first(n_src2));
> ra_->set1(m3->_idx, ra_->get_reg_first(this));
>
> nodes->push(m1);
> nodes->push(m2);
> nodes->push(m3);
>
> #line 11120 "../generated/adfiles/ad_sparc.cpp"
> }
> }
More information about the hotspot-compiler-dev
mailing list