[aarch64-port-dev ] RFR: Optimised multiplyExact patch
Edward Nevill
edward.nevill at linaro.org
Mon Jul 7 15:34:13 UTC 2014
Hi,
The following patch optimises multiplyExact to generate the following code
Integer case:-
0x0000007f751404e0: smull x8, w19, w11
0x0000007f751404e4: cmp x8, w8, sxtw
0x0000007f751404e8: b.ne 0x0000007f75140530 ;*invokestatic multiplyExact
Long case:-
0x0000007f811404e0: mul x8, x19, x10
0x0000007f811404e4: smulh x9, x19, x10
0x0000007f811404e8: cmp x9, x8, asr #31
0x0000007f811404ec: b.ne 0x0000007f81140534 ;*invokestatic multiplyExact
The patch has additional rules to convert the bvs after the multiply exact into a bne and therefor no longer needs the ugly code to generate the V flag from the Z flag.
OK?
Ed.
--- CUT HERE ---
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1404746752 -3600
# Mon Jul 07 16:25:52 2014 +0100
# Node ID 76a6867e8c34fb6ac892db0a0d2ea76aaf0c3415
# Parent aafb8a6d2b38862426dda0d3eb8061d7a1291fe0
Add support for multiplyExact
diff -r aafb8a6d2b38 -r 76a6867e8c34 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Mon Jul 07 16:24:51 2014 +0100
+++ b/src/cpu/aarch64/vm/aarch64.ad Mon Jul 07 16:25:52 2014 +0100
@@ -10602,6 +10602,96 @@
ins_pipe(pipe_class_default);
%}
+instruct overflowMulI_reg(rFlagsReg cr, iRegI op1, iRegI op2)
+%{
+ match(Set cr (OverflowMulI op1 op2));
+
+ format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
+ "cmp rscratch1, rscratch1, sxtw\n\t"
+ "movw rscratch1, #0x80000000\n\t"
+ "cselw rscratch1, rscratch1, zr, NE\n\t"
+ "cmpw rscratch1, #1" %}
+ ins_cost(5 * INSN_COST);
+ ins_encode %{
+ __ smull(rscratch1, $op1$$Register, $op2$$Register);
+ __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow
+ __ movw(rscratch1, 0x80000000); // Develop 0 (EQ),
+ __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
+ __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, rFlagsReg cr)
+%{
+ match(If cmp (OverflowMulI op1 op2));
+ predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
+ || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
+ effect(USE labl, KILL cr);
+
+ format %{ "smull rscratch1, $op1, $op2\t# overflow check int\n\t"
+ "cmp rscratch1, rscratch1, sxtw\n\t"
+ "b$cmp $labl" %}
+ ins_cost(3 * INSN_COST); // Branch is rare so treat as INSN_COST
+ ins_encode %{
+ Label* L = $labl$$label;
+ Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+ __ smull(rscratch1, $op1$$Register, $op2$$Register);
+ __ subs(zr, rscratch1, rscratch1, ext::sxtw); // NE => overflow
+ __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg(rFlagsReg cr, iRegL op1, iRegL op2)
+%{
+ match(Set cr (OverflowMulL op1 op2));
+
+ format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t"
+ "smulh rscratch2, $op1, $op2\n\t"
+ "cmp rscratch2, rscratch1, ASR #31\n\t"
+ "movw rscratch1, #0x80000000\n\t"
+ "cselw rscratch1, rscratch1, zr, NE\n\t"
+ "cmpw rscratch1, #1" %}
+ ins_cost(6 * INSN_COST);
+ ins_encode %{
+ __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63
+ __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
+ __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext
+ __ movw(rscratch1, 0x80000000); // Develop 0 (EQ),
+ __ cselw(rscratch1, rscratch1, zr, Assembler::NE); // or 0x80000000 (NE)
+ __ cmpw(rscratch1, 1); // 0x80000000 - 1 => VS
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, rFlagsReg cr)
+%{
+ match(If cmp (OverflowMulL op1 op2));
+ predicate(n->in(1)->as_Bool()->_test._test == BoolTest::overflow
+ || n->in(1)->as_Bool()->_test._test == BoolTest::no_overflow);
+ effect(USE labl, KILL cr);
+
+ format %{ "mul rscratch1, $op1, $op2\t#overflow check long\n\t"
+ "smulh rscratch2, $op1, $op2\n\t"
+ "cmp rscratch2, rscratch1, ASR #31\n\t"
+ "b$cmp $labl" %}
+ ins_cost(4 * INSN_COST); // Branch is rare so treat as INSN_COST
+ ins_encode %{
+ Label* L = $labl$$label;
+ Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+ __ mul(rscratch1, $op1$$Register, $op2$$Register); // Result bits 0..63
+ __ smulh(rscratch2, $op1$$Register, $op2$$Register); // Result bits 64..127
+ __ cmp(rscratch2, rscratch1, Assembler::ASR, 31); // Top is pure sign ext
+ __ br(cond == Assembler::VS ? Assembler::NE : Assembler::EQ, *L);
+ %}
+
+ ins_pipe(pipe_class_default);
+%}
+
// ============================================================================
// Compare Instructions
--- CUT HERE ---
More information about the aarch64-port-dev
mailing list