[aarch64-port-dev ] RFR: Add support for native crc32 instructions
Edward Nevill
ed at camswl.com
Mon Jun 16 20:46:43 UTC 2014
Hi,
The following patch adds support for using the native crc32 instruction in the aarch64 instruction set to calculate crc.
The patch automatically detects whether or not the cpu has the crc32 instruction extensions and uses them accordingly. This is done using getauxval().
There is also a command line option -XX:+/-UseCRC32 which will override the auto detection. A warning is printed if you try to enable it on hardware which it thinks does not have crc32.
This has been tested using the CRCTest from the jtreg test suite.
It has also been benchmarked calculating the crc of a 16M array and shows a 9.9 X improvement over the integer table driven version.
I have also done a basic smoketest running langtools and building and running a builtin sim version.
Regards,
Ed.
--- CUT HERE ---
# HG changeset patch
# User Edward Nevill edward.nevill at linaro.org
# Date 1402950043 -3600
# Mon Jun 16 21:20:43 2014 +0100
# Node ID ca4f6b4fdf4cb9bfee38eade22b6fff1407c5825
# Parent 55084fca52d279e90686b5cc53bf87aa853a3c75
Add support for builtin crc32 instructions
diff -r 55084fca52d2 -r ca4f6b4fdf4c src/cpu/aarch64/vm/assembler_aarch64.hpp
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp Thu Jun 12 11:28:16 2014 +0100
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp Mon Jun 16 21:20:43 2014 +0100
@@ -2059,6 +2059,20 @@
rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
+ // CRC32 instructions
+#define INSN(NAME, sf, sz) \
+ void NAME(Register Rd, Register Rn, Register Rm) { \
+ starti; \
+ f(sf, 31), f(0b0011010110, 30, 21), f(0b0100, 15, 12), f(sz, 11, 10); \
+ rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \
+ }
+
+ INSN(crc32b, 0, 0b00);
+ INSN(crc32h, 0, 0b01);
+ INSN(crc32w, 0, 0b10);
+ INSN(crc32x, 1, 0b11);
+
+#undef INSN
/* Simulator extensions to the ISA
diff -r 55084fca52d2 -r ca4f6b4fdf4c src/cpu/aarch64/vm/globals_aarch64.hpp
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp Thu Jun 12 11:28:16 2014 +0100
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp Mon Jun 16 21:20:43 2014 +0100
@@ -102,6 +102,7 @@
// Don't attempt to use Neon on builtin sim until builtin sim supports it
#define UseNeon false
+#define UseCRC32 false
#else
#define UseBuiltinSim false
@@ -119,7 +120,9 @@
notproduct(bool, UseAcqRelForVolatileFields, false, \
"Use acquire and release insns for volatile fields") \
product(bool, UseNeon, false, \
- "Use Neon for CRC32 computation")
+ "Use Neon for CRC32 computation") \
+ product(bool, UseCRC32, false, \
+ "Use CRC32 instructions for CRC32 computation")
#endif
diff -r 55084fca52d2 -r ca4f6b4fdf4c src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Thu Jun 12 11:28:16 2014 +0100
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp Mon Jun 16 21:20:43 2014 +0100
@@ -2156,6 +2156,57 @@
unsigned long offset;
ornw(crc, zr, crc);
+
+ if (UseCRC32) {
+ Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop;
+
+ subs(len, len, 64);
+ br(Assembler::GE, CRC_by64_loop);
+ adds(len, len, 64-4);
+ br(Assembler::GE, CRC_by4_loop);
+ adds(len, len, 4);
+ br(Assembler::GT, CRC_by1_loop);
+ b(L_exit);
+
+ BIND(CRC_by4_loop);
+ ldrw(tmp, Address(post(buf, 4)));
+ subs(len, len, 4);
+ crc32w(crc, crc, tmp);
+ br(Assembler::GE, CRC_by4_loop);
+ adds(len, len, 4);
+ br(Assembler::LE, L_exit);
+ BIND(CRC_by1_loop);
+ ldrb(tmp, Address(post(buf, 1)));
+ subs(len, len, 1);
+ crc32b(crc, crc, tmp);
+ br(Assembler::GT, CRC_by1_loop);
+ b(L_exit);
+
+ align(CodeEntryAlignment);
+ BIND(CRC_by64_loop);
+ subs(len, len, 64);
+ ldp(tmp, tmp3, Address(post(buf, 16)));
+ crc32x(crc, crc, tmp);
+ crc32x(crc, crc, tmp3);
+ ldp(tmp, tmp3, Address(post(buf, 16)));
+ crc32x(crc, crc, tmp);
+ crc32x(crc, crc, tmp3);
+ ldp(tmp, tmp3, Address(post(buf, 16)));
+ crc32x(crc, crc, tmp);
+ crc32x(crc, crc, tmp3);
+ ldp(tmp, tmp3, Address(post(buf, 16)));
+ crc32x(crc, crc, tmp);
+ crc32x(crc, crc, tmp3);
+ br(Assembler::GE, CRC_by64_loop);
+ adds(len, len, 64-4);
+ br(Assembler::GE, CRC_by4_loop);
+ adds(len, len, 4);
+ br(Assembler::GT, CRC_by1_loop);
+ BIND(L_exit);
+ ornw(crc, zr, crc);
+ return;
+ }
+
adrp(table0, ExternalAddress(StubRoutines::crc_table_addr()), offset);
if (offset) add(table0, table0, offset);
add(table1, table0, 1*256*sizeof(juint));
diff -r 55084fca52d2 -r ca4f6b4fdf4c src/cpu/aarch64/vm/vm_version_aarch64.cpp
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp Thu Jun 12 11:28:16 2014 +0100
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp Mon Jun 16 21:20:43 2014 +0100
@@ -35,6 +35,16 @@
# include "os_linux.inline.hpp"
#endif
+#ifndef BUILTIN_SIM
+#include <sys/auxv.h>
+#include <asm/hwcap.h>
+
+#ifndef HWCAP_CRC32
+#define HWCAP_CRC32 (1<<7)
+#endif
+
+#endif
+
int VM_Version::_cpu;
int VM_Version::_model;
int VM_Version::_stepping;
@@ -92,6 +102,16 @@
FLAG_SET_DEFAULT(PrefetchFieldsAhead, 256);
FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 256);
+#ifndef BUILTIN_SIM
+ unsigned long auxv = getauxval(AT_HWCAP);
+ if (FLAG_IS_DEFAULT(UseCRC32)) {
+ UseCRC32 = (auxv & HWCAP_CRC32) != 0;
+ }
+ if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) {
+ warning("UseCRC32 specified, but not supported on this CPU");
+ }
+#endif
+
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
UseCRC32Intrinsics = true;
}
--- CUT HERE ---
More information about the aarch64-port-dev
mailing list