[aarch64-port-dev ] RFR: Re: Error in server compiler when packing/unpacking data from arrays using shift and mask ops.
Edward Nevill
edward.nevill at linaro.org
Thu Dec 5 05:01:04 PST 2013
On Wed, 2013-12-04 at 16:07 -0500, Andy Johnson wrote:
> The jtreg hotspot/compiler test TestCharVect.java contains the following
> code snippet:
> long l0 = (long)a1[i*4+0];
> long l1 = (long)a1[i*4+1];
> long l2 = (long)a1[i*4+2];
> long l3 = (long)a1[i*4+3];
> p4[i] = (l0 & 0xFFFFl) |
> ((l1 & 0xFFFFl) << 16) |
> ((l2 & 0xFFFFl) << 32) |
> ((l3 & 0xFFFFl) << 48);
Much code elided.
>
> 0x00007fcac91a7d28: sbfx x15, x15, #16, #16 <<<<<<<<<<<
> 0x00007fcac91a7d2c: sbfiz x14, x14, #32, #32
> 0x00007fcac91a7d30: sbfiz x16, x17, #16, #32
> 0x00007fcac91a7d34: sxtw x11, w11
> 0x00007fcac91a7d38: orr x11, x11, x16
> 0x00007fcac91a7d3c: orr x11, x11, x14
> 0x00007fcac91a7d40: sbfiz x14, x5, #32, #32
> 0x00007fcac91a7d44: sbfiz x16, x1, #16, #32
> 0x00007fcac91a7d48: sxtw x17, w3
> 0x00007fcac91a7d4c: orr x16, x17, x16
> 0x00007fcac91a7d50: orr x14, x16, x14
> 0x00007fcac91a7d54: orr x14, x14, x15
> 0x00007fcac91a7d58: add xscratch1, x13, #0x10
> 0x00007fcac91a7d5c: str x14, [xscratch1,w0,sxtw #3]
> 0x00007fcac91a7d60: sbfx x14, x18, #16, #16 <<<<<<<<<<<<<
I believe the lines marked "<<<<<<<<<" are the source of the problem. What they are trying to do is shift left by 48, what they in fact do is a bitfield extract of bit 16..31.
This is due to the baroque encoding of the sbfiz and sbfx instructions.
I believe the following patch will fix the problem.
Ok to push?
Ed.
--- CUT HERE ---
exporting patch:
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1386246975 0
# Thu Dec 05 12:36:15 2013 +0000
# Node ID 9a4f9705f626b50214d9b11917fd0aaef88685f3
# Parent 141fc5d4229ae66293617edb25050506932471ec
Fix lshift_ext in C2 for shifts >= 32
diff -r 141fc5d4229a -r 9a4f9705f626 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad Mon Dec 02 17:19:42 2013 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad Thu Dec 05 12:36:15 2013 +0000
@@ -6930,8 +6930,13 @@
format %{ "sbfm $dst, $src, 64-$scale, 31\t" %}
ins_encode %{
- __ sbfm(as_Register($dst$$reg),
- as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
+ if ($scale$$constant >= 32)
+ // If scale >= 32 must encode this as LSL, sbfm encodes as SBFX, not SBFIZ
+ __ ubfm(as_Register($dst$$reg),
+ as_Register($src$$reg), (64u - $scale$$constant) & 63, 63 - $scale$$constant);
+ else
+ __ sbfm(as_Register($dst$$reg),
+ as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
%}
ins_pipe(pipe_class_default);
--- CUT HERE ---
The following gcc test program shows how gcc encodes shifts and shows how sbfm encodes variusly to sbfiz/sbfx.
I had to write this to try to get my head around the sbfm encoding.
--- CUT HERE ---
long shift8(int a)
{
return (long)a << 8;
}
long shift16(int a)
{
return (long)a << 16;
}
long shift24(int a)
{
return (long)a << 24;
}
long shift32(int a)
{
return (long)a << 32;
}
long shift40(int a)
{
return (long)a << 40;
}
long shift48(int a)
{
return (long)a << 48;
}
long shift56(int a)
{
return (long)a << 56;
}
long asm_shift8(int a)
{
long b;
asm("sbfm %[result], %[source], 56, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
long asm_shift16(int a)
{
long b;
asm("sbfm %[result], %[source], 48, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
long asm_shift24(int a)
{
long b;
asm("sbfm %[result], %[source], 40, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
long asm_shift32(int a)
{
long b;
asm("sbfm %[result], %[source], 32, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
long asm_shift40(int a)
{
long b;
asm("sbfm %[result], %[source], 24, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
long asm_shift48(int a)
{
long b;
asm("sbfm %[result], %[source], 16, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
long asm_shift56(int a)
{
long b;
asm("sbfm %[result], %[source], 8, 31" : [result]"=r" (b) : [source]"r" (a));
return b;
}
--- CUT HERE ---
And here is the output from objdump when compiled
gcc -O3 -c shift.c
--- CUT HERE ---
shift.o: file format elf64-littleaarch64
Disassembly of section .text:
0000000000000000 <shift8>:
0: 93787c00 sbfiz x0, x0, #8, #32
4: d65f03c0 ret
0000000000000008 <shift16>:
8: 93707c00 sbfiz x0, x0, #16, #32
c: d65f03c0 ret
0000000000000010 <shift24>:
10: 93687c00 sbfiz x0, x0, #24, #32
14: d65f03c0 ret
0000000000000018 <shift32>:
18: d3607c00 lsl x0, x0, #32
1c: d65f03c0 ret
0000000000000020 <shift40>:
20: d3585c00 lsl x0, x0, #40
24: d65f03c0 ret
0000000000000028 <shift48>:
28: d3503c00 lsl x0, x0, #48
2c: d65f03c0 ret
0000000000000030 <shift56>:
30: d3481c00 lsl x0, x0, #56
34: d65f03c0 ret
0000000000000038 <asm_shift8>:
38: 93787c00 sbfiz x0, x0, #8, #32
3c: d65f03c0 ret
0000000000000040 <asm_shift16>:
40: 93707c00 sbfiz x0, x0, #16, #32
44: d65f03c0 ret
0000000000000048 <asm_shift24>:
48: 93687c00 sbfiz x0, x0, #24, #32
4c: d65f03c0 ret
0000000000000050 <asm_shift32>:
50: 93607c00 sbfiz x0, x0, #32, #32
54: d65f03c0 ret
0000000000000058 <asm_shift40>:
58: 93587c00 sbfx x0, x0, #24, #8
5c: d65f03c0 ret
0000000000000060 <asm_shift48>:
60: 93507c00 sbfx x0, x0, #16, #16
64: d65f03c0 ret
0000000000000068 <asm_shift56>:
68: 93487c00 sbfx x0, x0, #8, #24
6c: d65f03c0 ret
--- CUT HERE ---
More information about the aarch64-port-dev
mailing list