[aarch64-port-dev ] RFR: Re: Error in server compiler when packing/unpacking data from arrays using shift and mask ops.

Thu Dec 5 05:01:04 PST 2013

On Wed, 2013-12-04 at 16:07 -0500, Andy Johnson wrote:
> The jtreg hotspot/compiler test TestCharVect.java contains the following
> code snippet:
>       long l0 = (long)a1[i*4+0];
>       long l1 = (long)a1[i*4+1];
>       long l2 = (long)a1[i*4+2];
>       long l3 = (long)a1[i*4+3];
>       p4[i] = (l0 & 0xFFFFl) |
>              ((l1 & 0xFFFFl) << 16) |
>              ((l2 & 0xFFFFl) << 32) |
>              ((l3 & 0xFFFFl) << 48);

Much code elided.

> 
>   0x00007fcac91a7d28: sbfx    x15, x15, #16, #16    <<<<<<<<<<<
>   0x00007fcac91a7d2c: sbfiz    x14, x14, #32, #32
>   0x00007fcac91a7d30: sbfiz    x16, x17, #16, #32
>   0x00007fcac91a7d34: sxtw    x11, w11
>   0x00007fcac91a7d38: orr    x11, x11, x16
>   0x00007fcac91a7d3c: orr    x11, x11, x14
>   0x00007fcac91a7d40: sbfiz    x14, x5, #32, #32
>   0x00007fcac91a7d44: sbfiz    x16, x1, #16, #32
>   0x00007fcac91a7d48: sxtw    x17, w3
>   0x00007fcac91a7d4c: orr    x16, x17, x16
>   0x00007fcac91a7d50: orr    x14, x16, x14
>   0x00007fcac91a7d54: orr    x14, x14, x15
>   0x00007fcac91a7d58: add    xscratch1, x13, #0x10
>   0x00007fcac91a7d5c: str    x14, [xscratch1,w0,sxtw #3]
>   0x00007fcac91a7d60: sbfx    x14, x18, #16, #16   <<<<<<<<<<<<<

I believe the lines marked "<<<<<<<<<" are the source of the problem. What they are trying to do is shift left by 48, what they in fact do is a bitfield extract of bit 16..31.

This is due to the baroque encoding of the sbfiz and sbfx instructions.

I believe the following patch will fix the problem.

Ok to push?
Ed.

--- CUT HERE ---
exporting patch:
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1386246975 0
#      Thu Dec 05 12:36:15 2013 +0000
# Node ID 9a4f9705f626b50214d9b11917fd0aaef88685f3
# Parent  141fc5d4229ae66293617edb25050506932471ec
Fix lshift_ext in C2 for shifts >= 32

diff -r 141fc5d4229a -r 9a4f9705f626 src/cpu/aarch64/vm/aarch64.ad
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Dec 02 17:19:42 2013 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu Dec 05 12:36:15 2013 +0000
@@ -6930,8 +6930,13 @@
   format %{ "sbfm $dst, $src, 64-$scale, 31\t" %}
 
   ins_encode %{
-    __ sbfm(as_Register($dst$$reg),
-            as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
+    if ($scale$$constant >= 32)
+      // If scale >= 32 must encode this as LSL, sbfm encodes as SBFX, not SBFIZ
+      __ ubfm(as_Register($dst$$reg),
+		    as_Register($src$$reg), (64u - $scale$$constant) & 63, 63 - $scale$$constant);
+    else
+      __ sbfm(as_Register($dst$$reg),
+		    as_Register($src$$reg), (64u - $scale$$constant) & 63, 31);
   %}
 
   ins_pipe(pipe_class_default);


--- CUT HERE ---


The following gcc test program shows how gcc encodes shifts and shows how sbfm encodes variusly to sbfiz/sbfx.

I had to write this to try to get my head around the sbfm encoding.

--- CUT HERE ---
long shift8(int a)
{
	return (long)a << 8;
}
long shift16(int a)
{
	return (long)a << 16;
}
long shift24(int a)
{
	return (long)a << 24;
}
long shift32(int a)
{
	return (long)a << 32;
}
long shift40(int a)
{
	return (long)a << 40;
}
long shift48(int a)
{
	return (long)a << 48;
}
long shift56(int a)
{
	return (long)a << 56;
}
long asm_shift8(int a)
{
	long b;
	asm("sbfm %[result], %[source], 56, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift16(int a)
{
	long b;
	asm("sbfm %[result], %[source], 48, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift24(int a)
{
	long b;
	asm("sbfm %[result], %[source], 40, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift32(int a)
{
	long b;
	asm("sbfm %[result], %[source], 32, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift40(int a)
{
	long b;
	asm("sbfm %[result], %[source], 24, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift48(int a)
{
	long b;
	asm("sbfm %[result], %[source], 16, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
long asm_shift56(int a)
{
	long b;
	asm("sbfm %[result], %[source], 8, 31" : [result]"=r" (b) : [source]"r" (a));
	return b;
}
--- CUT HERE ---

And here is the output from objdump when compiled

gcc -O3 -c shift.c

--- CUT HERE ---
shift.o:     file format elf64-littleaarch64


Disassembly of section .text:

0000000000000000 <shift8>:
   0:	93787c00 	sbfiz	x0, x0, #8, #32
   4:	d65f03c0 	ret

0000000000000008 <shift16>:
   8:	93707c00 	sbfiz	x0, x0, #16, #32
   c:	d65f03c0 	ret

0000000000000010 <shift24>:
  10:	93687c00 	sbfiz	x0, x0, #24, #32
  14:	d65f03c0 	ret

0000000000000018 <shift32>:
  18:	d3607c00 	lsl	x0, x0, #32
  1c:	d65f03c0 	ret

0000000000000020 <shift40>:
  20:	d3585c00 	lsl	x0, x0, #40
  24:	d65f03c0 	ret

0000000000000028 <shift48>:
  28:	d3503c00 	lsl	x0, x0, #48
  2c:	d65f03c0 	ret

0000000000000030 <shift56>:
  30:	d3481c00 	lsl	x0, x0, #56
  34:	d65f03c0 	ret

0000000000000038 <asm_shift8>:
  38:	93787c00 	sbfiz	x0, x0, #8, #32
  3c:	d65f03c0 	ret

0000000000000040 <asm_shift16>:
  40:	93707c00 	sbfiz	x0, x0, #16, #32
  44:	d65f03c0 	ret

0000000000000048 <asm_shift24>:
  48:	93687c00 	sbfiz	x0, x0, #24, #32
  4c:	d65f03c0 	ret

0000000000000050 <asm_shift32>:
  50:	93607c00 	sbfiz	x0, x0, #32, #32
  54:	d65f03c0 	ret

0000000000000058 <asm_shift40>:
  58:	93587c00 	sbfx	x0, x0, #24, #8
  5c:	d65f03c0 	ret

0000000000000060 <asm_shift48>:
  60:	93507c00 	sbfx	x0, x0, #16, #16
  64:	d65f03c0 	ret

0000000000000068 <asm_shift56>:
  68:	93487c00 	sbfx	x0, x0, #8, #24
  6c:	d65f03c0 	ret
--- CUT HERE ---