Patch: improve huge page support
Andrew Haley
aph at redhat.com
Fri May 13 10:17:16 PDT 2011
On 05/13/2011 04:03 PM, Andrew Haley wrote:
> On 05/12/2011 05:47 PM, Dr Andrew John Hughes wrote:
>> Looks ok for HEAD to me, assuming it still builds.
>>
>> Post patches for 1.9 and 1.10 and I'll approve them. There's likely to be some
>> churn between HotSpot versions so I wouldn't be surprised if the patch had
>> to be rejigged. Probably worth checking it even still works on old versions,
>> as I don't know if this has dependencies on other changes.
>
> I've checked with HS19 and 20. This is the patch for icedtea6-1.10, 1.9
> to follow. This is the same as trunk, 1.9 had to be reworked.
1.9 is problematic in that we need different patches for HotSpot. This is
a slightly tweaked version form hs18, the hs19 patch is the same as 1.10.
Andrew.
2011-05-13 Andrew Haley <aph at redhat.com>
* patches/hotspot/*/7034464-hugepage.patch,
* patches/openjdk/7037939-hugepage.patch,
* patches/openjdk/7043564-hugepage.patch: Import from upstream.
* Makefile.am: add patches.
diff -r 2f549e3a4720 Makefile.am
--- a/Makefile.am Fri Apr 15 15:51:17 2011 +0200
+++ b/Makefile.am Fri May 13 18:15:09 2011 +0100
@@ -342,7 +342,10 @@
patches/openjdk/6691503-malicious-applet-always-on-top.patch \
patches/jtreg-LastErrorString.patch \
patches/shark-llvm-2.9.patch \
- patches/mark_sun_toolkit_privileged_code.patch
+ patches/mark_sun_toolkit_privileged_code.patch \
+ patches/hotspot/$(HSBUILD)/7034464-hugepage.patch \
+ patches/openjdk/7037939-hugepage.patch \
+ patches/openjdk/7043564-hugepage.patch
if WITH_ALT_HSBUILD
ICEDTEA_PATCHES += \
diff -r 2f549e3a4720 patches/hotspot/hs19/7034464-hugepage.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/hotspot/hs19/7034464-hugepage.patch Fri May 13 18:15:09 2011 +0100
@@ -0,0 +1,458 @@
+# HG changeset patch
+# User iveresov
+# Date 1303344724 25200
+# Node ID 139667d9836ae218ab06a74d5813a5a700a076c9
+# Parent 49a67202bc671d23d719c9e14752d80295a9e8f6
+7034464: Support transparent large pages on Linux
+Summary: Support transparent huge pages on Linux available since 2.6.38
+Reviewed-by: iveresov, ysr
+Contributed-by: aph at redhat.com
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/globals_linux.hpp openjdk/hotspot/src/os/linux/vm/globals_linux.hpp
+--- openjdk.patched/hotspot/src/os/linux/vm/globals_linux.hpp 2011-03-16 02:30:16.000000000 +0000
++++ openjdk/hotspot/src/os/linux/vm/globals_linux.hpp 2011-05-04 11:57:15.083470027 +0100
+@@ -29,13 +29,19 @@
+ // Defines Linux specific flags. They are not available on other platforms.
+ //
+ #define RUNTIME_OS_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
+- product(bool, UseOprofile, false, \
+- "enable support for Oprofile profiler") \
+- \
+- product(bool, UseLinuxPosixThreadCPUClocks, true, \
+- "enable fast Linux Posix clocks where available")
+-// NB: The default value of UseLinuxPosixThreadCPUClocks may be
+-// overridden in Arguments::parse_each_vm_init_arg.
++ product(bool, UseOprofile, false, \
++ "enable support for Oprofile profiler") \
++ \
++ product(bool, UseLinuxPosixThreadCPUClocks, true, \
++ "enable fast Linux Posix clocks where available") \
++/* NB: The default value of UseLinuxPosixThreadCPUClocks may be \
++ overridden in Arguments::parse_each_vm_init_arg. */ \
++ \
++ product(bool, UseHugeTLBFS, false, \
++ "Use MAP_HUGETLB for large pages") \
++ \
++ product(bool, UseSHM, false, \
++ "Use SYSV shared memory for large pages")
+
+ //
+ // Defines Linux-specific default values. The flags are available on all
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp openjdk/hotspot/src/os/linux/vm/os_linux.cpp
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-03 14:32:54.285722656 +0100
++++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-09 14:31:51.358502774 +0100
+@@ -2495,16 +2495,40 @@
+ return res != (uintptr_t) MAP_FAILED;
+ }
+
++// Define MAP_HUGETLB here so we can build HotSpot on old systems.
++#ifndef MAP_HUGETLB
++#define MAP_HUGETLB 0x40000
++#endif
++
++// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
++#ifndef MADV_HUGEPAGE
++#define MADV_HUGEPAGE 14
++#endif
++
+ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+ bool exec) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
++ uintptr_t res =
++ (uintptr_t) ::mmap(addr, size, prot,
++ MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
++ -1, 0);
++ return res != (uintptr_t) MAP_FAILED;
++ }
++
+ return commit_memory(addr, size, exec);
+ }
+
+-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
++void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ // We don't check the return value: madvise(MADV_HUGEPAGE) may not
++ // be supported or the memory may already be backed by huge pages.
++ ::madvise(addr, bytes, MADV_HUGEPAGE);
++ }
++}
+
+ void os::free_memory(char *addr, size_t bytes) {
+- ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
+- MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
++ ::madvise(addr, bytes, MADV_DONTNEED);
+ }
+
+ void os::numa_make_global(char *addr, size_t bytes) {
+@@ -2870,7 +2894,16 @@
+ static size_t _large_page_size = 0;
+
+ bool os::large_page_init() {
+- if (!UseLargePages) return false;
++ if (!UseLargePages) {
++ UseHugeTLBFS = false;
++ UseSHM = false;
++ return false;
++ }
++
++ if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
++ // Our user has not expressed a preference, so we'll try both.
++ UseHugeTLBFS = UseSHM = true;
++ }
+
+ if (LargePageSizeInBytes) {
+ _large_page_size = LargePageSizeInBytes;
+@@ -2915,6 +2948,9 @@
+ }
+ }
+
++ // print a warning if any large page related flag is specified on command line
++ bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
++
+ const size_t default_page_size = (size_t)Linux::page_size();
+ if (_large_page_size > default_page_size) {
+ _page_sizes[0] = _large_page_size;
+@@ -2922,6 +2958,14 @@
+ _page_sizes[2] = 0;
+ }
+
++ UseHugeTLBFS = UseHugeTLBFS &&
++ Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
++
++ if (UseHugeTLBFS)
++ UseSHM = false;
++
++ UseLargePages = UseHugeTLBFS || UseSHM;
++
+ // Large page support is available on 2.6 or newer kernel, some vendors
+ // (e.g. Redhat) have backported it to their 2.4 based distributions.
+ // We optimistically assume the support is available. If later it turns out
+@@ -2936,7 +2980,7 @@
+ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
+ // "exec" is passed in but not used. Creating the shared image for
+ // the code cache doesn't have an SHM_X executable permission to check.
+- assert(UseLargePages, "only for large pages");
++ assert(UseLargePages && UseSHM, "only for SHM large pages");
+
+ key_t key = IPC_PRIVATE;
+ char *addr;
+@@ -3003,19 +3047,19 @@
+ return _large_page_size;
+ }
+
+-// Linux does not support anonymous mmap with large page memory. The only way
+-// to reserve large page memory without file backing is through SysV shared
+-// memory API. The entire memory region is committed and pinned upfront.
+-// Hopefully this will change in the future...
++// HugeTLBFS allows application to commit large page memory on demand;
++// with SysV SHM the entire memory region must be allocated as shared
++// memory.
+ bool os::can_commit_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ bool os::can_execute_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ // Reserve memory at an arbitrary address, only if that area is
++// Reserve memory at an arbitrary address, only if that area is
+ // available (and not reserved for something else).
+
+ char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+@@ -5009,6 +5053,43 @@
+ // JSR166
+ // -------------------------------------------------------
+
++bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
++ bool result = false;
++ void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
++ MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
++ -1, 0);
++
++ if (p != (void *) -1) {
++ // We don't know if this really is a huge page or not.
++ FILE *fp = fopen("/proc/self/maps", "r");
++ if (fp) {
++ while (!feof(fp)) {
++ char chars[257];
++ long x = 0;
++ if (fgets(chars, sizeof(chars), fp)) {
++ if (sscanf(chars, "%lx-%*lx", &x) == 1
++ && x == (long)p) {
++ if (strstr (chars, "hugepage")) {
++ result = true;
++ break;
++ }
++ }
++ }
++ }
++ fclose(fp);
++ }
++ munmap (p, page_size);
++ if (result)
++ return true;
++ }
++
++ if (warn) {
++ warning("HugeTLBFS is not supported by the operating system.");
++ }
++
++ return result;
++}
++
+ /*
+ * The solaris and linux implementations of park/unpark are fairly
+ * conservative for now, but can be improved. They currently use a
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp.orig openjdk/hotspot/src/os/linux/vm/os_linux.cpp.orig
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp.orig 2011-05-03 14:32:53.972649764 +0100
++++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp.orig 2011-05-04 11:57:15.085470494 +0100
+@@ -117,6 +117,10 @@
+ # include <inttypes.h>
+ # include <sys/ioctl.h>
+
++#if __x86_64__
++#include <asm/vsyscall.h>
++#endif
++
+ #define MAX_PATH (2 * K)
+
+ // for timer info max values which include all bits
+@@ -2491,16 +2495,40 @@
+ return res != (uintptr_t) MAP_FAILED;
+ }
+
++// Define MAP_HUGETLB here so we can build HotSpot on old systems.
++#ifndef MAP_HUGETLB
++#define MAP_HUGETLB 0x40000
++#endif
++
++// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
++#ifndef MADV_HUGEPAGE
++#define MADV_HUGEPAGE 14
++#endif
++
+ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+ bool exec) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
++ uintptr_t res =
++ (uintptr_t) ::mmap(addr, size, prot,
++ MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
++ -1, 0);
++ return res != (uintptr_t) MAP_FAILED;
++ }
++
+ return commit_memory(addr, size, exec);
+ }
+
+-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
++void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ // We don't check the return value: madvise(MADV_HUGEPAGE) may not
++ // be supported or the memory may already be backed by huge pages.
++ ::madvise(addr, bytes, MADV_HUGEPAGE);
++ }
++}
+
+ void os::free_memory(char *addr, size_t bytes) {
+- ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
+- MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
++ ::madvise(addr, bytes, MADV_DONTNEED);
+ }
+
+ void os::numa_make_global(char *addr, size_t bytes) {
+@@ -2544,6 +2572,21 @@
+ return end;
+ }
+
++static int sched_getcpu_syscall(void) {
++ unsigned int cpu;
++ int retval = -1;
++
++#if __x86_64__
++ typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
++ vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
++ retval = vgetcpu(&cpu, NULL, NULL);
++#elif __i386__
++ retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
++#endif
++
++ return (retval == -1) ? retval : cpu;
++}
++
+ extern "C" void numa_warn(int number, char *where, ...) { }
+ extern "C" void numa_error(char *where) { }
+
+@@ -2565,6 +2608,10 @@
+ set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
+ dlsym(RTLD_DEFAULT, "sched_getcpu")));
+
++ // If it's not, try a direct syscall.
++ if (sched_getcpu() == -1)
++ set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, (void*)&sched_getcpu_syscall));
++
+ if (sched_getcpu() != -1) { // Does it work?
+ void *handle = dlopen("libnuma.so.1", RTLD_LAZY);
+ if (handle != NULL) {
+@@ -2847,7 +2894,16 @@
+ static size_t _large_page_size = 0;
+
+ bool os::large_page_init() {
+- if (!UseLargePages) return false;
++ if (!UseLargePages) {
++ UseHugeTLBFS = false;
++ UseSHM = false;
++ return false;
++ }
++
++ if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
++ // Our user has not expressed a preference, so we'll try both.
++ UseHugeTLBFS = UseSHM = true;
++ }
+
+ if (LargePageSizeInBytes) {
+ _large_page_size = LargePageSizeInBytes;
+@@ -2892,6 +2948,9 @@
+ }
+ }
+
++ // print a warning if any large page related flag is specified on command line
++ bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
++
+ const size_t default_page_size = (size_t)Linux::page_size();
+ if (_large_page_size > default_page_size) {
+ _page_sizes[0] = _large_page_size;
+@@ -2899,6 +2958,14 @@
+ _page_sizes[2] = 0;
+ }
+
++ UseHugeTLBFS = UseHugeTLBFS &&
++ Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
++
++ if (UseHugeTLBFS)
++ UseSHM = false;
++
++ UseLargePages = UseHugeTLBFS || UseSHM;
++
+ // Large page support is available on 2.6 or newer kernel, some vendors
+ // (e.g. Redhat) have backported it to their 2.4 based distributions.
+ // We optimistically assume the support is available. If later it turns out
+@@ -2913,7 +2980,7 @@
+ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
+ // "exec" is passed in but not used. Creating the shared image for
+ // the code cache doesn't have an SHM_X executable permission to check.
+- assert(UseLargePages, "only for large pages");
++ assert(UseLargePages && UseSHM, "only for SHM large pages");
+
+ key_t key = IPC_PRIVATE;
+ char *addr;
+@@ -2980,19 +3047,19 @@
+ return _large_page_size;
+ }
+
+-// Linux does not support anonymous mmap with large page memory. The only way
+-// to reserve large page memory without file backing is through SysV shared
+-// memory API. The entire memory region is committed and pinned upfront.
+-// Hopefully this will change in the future...
++// HugeTLBFS allows application to commit large page memory on demand;
++// with SysV SHM the entire memory region must be allocated as shared
++// memory.
+ bool os::can_commit_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ bool os::can_execute_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ // Reserve memory at an arbitrary address, only if that area is
++// Reserve memory at an arbitrary address, only if that area is
+ // available (and not reserved for something else).
+
+ char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+@@ -4081,6 +4148,23 @@
+ UseNUMA = false;
+ }
+ }
++ // With SHM large pages we cannot uncommit a page, so there's not way
++ // we can make the adaptive lgrp chunk resizing work. If the user specified
++ // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
++ // disable adaptive resizing.
++ if (UseNUMA && UseLargePages && UseSHM) {
++ if (!FLAG_IS_DEFAULT(UseNUMA)) {
++ if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) {
++ UseLargePages = false;
++ } else {
++ warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
++ UseAdaptiveSizePolicy = false;
++ UseAdaptiveNUMAChunkSizing = false;
++ }
++ } else {
++ UseNUMA = false;
++ }
++ }
+ if (!UseNUMA && ForceNUMA) {
+ UseNUMA = true;
+ }
+@@ -4986,6 +5070,43 @@
+ // JSR166
+ // -------------------------------------------------------
+
++bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
++ bool result = false;
++ void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
++ MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
++ -1, 0);
++
++ if (p != (void *) -1) {
++ // We don't know if this really is a huge page or not.
++ FILE *fp = fopen("/proc/self/maps", "r");
++ if (fp) {
++ while (!feof(fp)) {
++ char chars[257];
++ long x = 0;
++ if (fgets(chars, sizeof(chars), fp)) {
++ if (sscanf(chars, "%lx-%*lx", &x) == 1
++ && x == (long)p) {
++ if (strstr (chars, "hugepage")) {
++ result = true;
++ break;
++ }
++ }
++<+ }
++ }
++ fclose(fp);
++ }
++ munmap (p, page_size);
++ if (result)
++ return true;
++ }
++
++ if (warn) {
++ warning("HugeTLBFS is not supported by the operating system.");
++ }
++
++ return result;
++}
++
+ /*
+ * The solaris and linux implementations of park/unpark are fairly
+ * conservative for now, but can be improved. They currently use a
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp openjdk/hotspot/src/os/linux/vm/os_linux.hpp
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp 2011-03-16 02:30:16.000000000 +0000
++++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp 2011-05-04 11:57:15.088471194 +0100
+@@ -86,6 +86,9 @@
+
+ static void rebuild_cpu_to_node_map();
+ static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
++
++ static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
++
+ public:
+ static void init_thread_fpu_state();
+ static int get_fpu_control_word();
diff -r 2f549e3a4720 patches/hotspot/original/7034464-hugepage.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/hotspot/original/7034464-hugepage.patch Fri May 13 18:15:09 2011 +0100
@@ -0,0 +1,444 @@
+# HG changeset patch
+# User iveresov
+# Date 1303344724 25200
+# Node ID 139667d9836ae218ab06a74d5813a5a700a076c9
+# Parent 49a67202bc671d23d719c9e14752d80295a9e8f6
+7034464: Support transparent large pages on Linux
+Summary: Support transparent huge pages on Linux available since 2.6.38
+Reviewed-by: iveresov, ysr
+Contributed-by: aph at redhat.com
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/globals_linux.hpp openjdk/hotspot/src/os/linux/vm/globals_linux.hpp
+--- openjdk/hotspot/src/os/linux/vm/globals_linux.hpp~ 2010-06-21 22:12:15.000000000 +0100
++++ openjdk/hotspot/src/os/linux/vm/globals_linux.hpp 2011-05-13 15:11:07.344978229 +0100
+@@ -31,6 +31,12 @@
+ \
+ product(bool, UseLinuxPosixThreadCPUClocks, false, \
+ "enable fast Linux Posix clocks where available") \
++ \
++ product(bool, UseHugeTLBFS, false, \
++ "Use MAP_HUGETLB for large pages") \
++ \
++ product(bool, UseSHM, false, \
++ "Use SYSV shared memory for large pages")
+
+
+ //
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp openjdk/hotspot/src/os/linux/vm/os_linux.cpp
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-03 14:32:54.285722656 +0100
++++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-09 14:31:51.358502774 +0100
+@@ -2495,16 +2495,40 @@
+ return res != (uintptr_t) MAP_FAILED;
+ }
+
++// Define MAP_HUGETLB here so we can build HotSpot on old systems.
++#ifndef MAP_HUGETLB
++#define MAP_HUGETLB 0x40000
++#endif
++
++// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
++#ifndef MADV_HUGEPAGE
++#define MADV_HUGEPAGE 14
++#endif
++
+ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+ bool exec) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
++ uintptr_t res =
++ (uintptr_t) ::mmap(addr, size, prot,
++ MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
++ -1, 0);
++ return res != (uintptr_t) MAP_FAILED;
++ }
++
+ return commit_memory(addr, size, exec);
+ }
+
+-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
++void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ // We don't check the return value: madvise(MADV_HUGEPAGE) may not
++ // be supported or the memory may already be backed by huge pages.
++ ::madvise(addr, bytes, MADV_HUGEPAGE);
++ }
++}
+
+ void os::free_memory(char *addr, size_t bytes) {
+- ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
+- MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
++ ::madvise(addr, bytes, MADV_DONTNEED);
+ }
+
+ void os::numa_make_global(char *addr, size_t bytes) {
+@@ -2870,7 +2894,16 @@
+ static size_t _large_page_size = 0;
+
+ bool os::large_page_init() {
+- if (!UseLargePages) return false;
++ if (!UseLargePages) {
++ UseHugeTLBFS = false;
++ UseSHM = false;
++ return false;
++ }
++
++ if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
++ // Our user has not expressed a preference, so we'll try both.
++ UseHugeTLBFS = UseSHM = true;
++ }
+
+ if (LargePageSizeInBytes) {
+ _large_page_size = LargePageSizeInBytes;
+@@ -2915,6 +2948,9 @@
+ }
+ }
+
++ // print a warning if any large page related flag is specified on command line
++ bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
++
+ const size_t default_page_size = (size_t)Linux::page_size();
+ if (_large_page_size > default_page_size) {
+ _page_sizes[0] = _large_page_size;
+@@ -2922,6 +2958,14 @@
+ _page_sizes[2] = 0;
+ }
+
++ UseHugeTLBFS = UseHugeTLBFS &&
++ Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
++
++ if (UseHugeTLBFS)
++ UseSHM = false;
++
++ UseLargePages = UseHugeTLBFS || UseSHM;
++
+ // Large page support is available on 2.6 or newer kernel, some vendors
+ // (e.g. Redhat) have backported it to their 2.4 based distributions.
+ // We optimistically assume the support is available. If later it turns out
+@@ -2936,7 +2980,7 @@
+ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
+ // "exec" is passed in but not used. Creating the shared image for
+ // the code cache doesn't have an SHM_X executable permission to check.
+- assert(UseLargePages, "only for large pages");
++ assert(UseLargePages && UseSHM, "only for SHM large pages");
+
+ key_t key = IPC_PRIVATE;
+ char *addr;
+@@ -3003,19 +3047,19 @@
+ return _large_page_size;
+ }
+
+-// Linux does not support anonymous mmap with large page memory. The only way
+-// to reserve large page memory without file backing is through SysV shared
+-// memory API. The entire memory region is committed and pinned upfront.
+-// Hopefully this will change in the future...
++// HugeTLBFS allows application to commit large page memory on demand;
++// with SysV SHM the entire memory region must be allocated as shared
++// memory.
+ bool os::can_commit_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ bool os::can_execute_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ // Reserve memory at an arbitrary address, only if that area is
++// Reserve memory at an arbitrary address, only if that area is
+ // available (and not reserved for something else).
+
+ char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+@@ -5009,6 +5053,43 @@
+ // JSR166
+ // -------------------------------------------------------
+
++bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
++ bool result = false;
++ void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
++ MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
++ -1, 0);
++
++ if (p != (void *) -1) {
++ // We don't know if this really is a huge page or not.
++ FILE *fp = fopen("/proc/self/maps", "r");
++ if (fp) {
++ while (!feof(fp)) {
++ char chars[257];
++ long x = 0;
++ if (fgets(chars, sizeof(chars), fp)) {
++ if (sscanf(chars, "%lx-%*lx", &x) == 1
++ && x == (long)p) {
++ if (strstr (chars, "hugepage")) {
++ result = true;
++ break;
++ }
++ }
++ }
++ }
++ fclose(fp);
++ }
++ munmap (p, page_size);
++ if (result)
++ return true;
++ }
++
++ if (warn) {
++ warning("HugeTLBFS is not supported by the operating system.");
++ }
++
++ return result;
++}
++
+ /*
+ * The solaris and linux implementations of park/unpark are fairly
+ * conservative for now, but can be improved. They currently use a
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp.orig openjdk/hotspot/src/os/linux/vm/os_linux.cpp.orig
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp.orig 2011-05-03 14:32:53.972649764 +0100
++++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp.orig 2011-05-04 11:57:15.085470494 +0100
+@@ -117,6 +117,10 @@
+ # include <inttypes.h>
+ # include <sys/ioctl.h>
+
++#if __x86_64__
++#include <asm/vsyscall.h>
++#endif
++
+ #define MAX_PATH (2 * K)
+
+ // for timer info max values which include all bits
+@@ -2491,16 +2495,40 @@
+ return res != (uintptr_t) MAP_FAILED;
+ }
+
++// Define MAP_HUGETLB here so we can build HotSpot on old systems.
++#ifndef MAP_HUGETLB
++#define MAP_HUGETLB 0x40000
++#endif
++
++// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
++#ifndef MADV_HUGEPAGE
++#define MADV_HUGEPAGE 14
++#endif
++
+ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
+ bool exec) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
++ uintptr_t res =
++ (uintptr_t) ::mmap(addr, size, prot,
++ MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
++ -1, 0);
++ return res != (uintptr_t) MAP_FAILED;
++ }
++
+ return commit_memory(addr, size, exec);
+ }
+
+-void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
++void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
++ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++ // We don't check the return value: madvise(MADV_HUGEPAGE) may not
++ // be supported or the memory may already be backed by huge pages.
++ ::madvise(addr, bytes, MADV_HUGEPAGE);
++ }
++}
+
+ void os::free_memory(char *addr, size_t bytes) {
+- ::mmap(addr, bytes, PROT_READ | PROT_WRITE,
+- MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0);
++ ::madvise(addr, bytes, MADV_DONTNEED);
+ }
+
+ void os::numa_make_global(char *addr, size_t bytes) {
+@@ -2544,6 +2572,21 @@
+ return end;
+ }
+
++static int sched_getcpu_syscall(void) {
++ unsigned int cpu;
++ int retval = -1;
++
++#if __x86_64__
++ typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache);
++ vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu);
++ retval = vgetcpu(&cpu, NULL, NULL);
++#elif __i386__
++ retval = syscall(SYS_getcpu, &cpu, NULL, NULL);
++#endif
++
++ return (retval == -1) ? retval : cpu;
++}
++
+ extern "C" void numa_warn(int number, char *where, ...) { }
+ extern "C" void numa_error(char *where) { }
+
+@@ -2565,6 +2608,10 @@
+ set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
+ dlsym(RTLD_DEFAULT, "sched_getcpu")));
+
++ // If it's not, try a direct syscall.
++ if (sched_getcpu() == -1)
++ set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, (void*)&sched_getcpu_syscall));
++
+ if (sched_getcpu() != -1) { // Does it work?
+ void *handle = dlopen("libnuma.so.1", RTLD_LAZY);
+ if (handle != NULL) {
+@@ -2847,7 +2894,16 @@
+ static size_t _large_page_size = 0;
+
+ bool os::large_page_init() {
+- if (!UseLargePages) return false;
++ if (!UseLargePages) {
++ UseHugeTLBFS = false;
++ UseSHM = false;
++ return false;
++ }
++
++ if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
++ // Our user has not expressed a preference, so we'll try both.
++ UseHugeTLBFS = UseSHM = true;
++ }
+
+ if (LargePageSizeInBytes) {
+ _large_page_size = LargePageSizeInBytes;
+@@ -2892,6 +2948,9 @@
+ }
+ }
+
++ // print a warning if any large page related flag is specified on command line
++ bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
++
+ const size_t default_page_size = (size_t)Linux::page_size();
+ if (_large_page_size > default_page_size) {
+ _page_sizes[0] = _large_page_size;
+@@ -2899,6 +2958,14 @@
+ _page_sizes[2] = 0;
+ }
+
++ UseHugeTLBFS = UseHugeTLBFS &&
++ Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
++
++ if (UseHugeTLBFS)
++ UseSHM = false;
++
++ UseLargePages = UseHugeTLBFS || UseSHM;
++
+ // Large page support is available on 2.6 or newer kernel, some vendors
+ // (e.g. Redhat) have backported it to their 2.4 based distributions.
+ // We optimistically assume the support is available. If later it turns out
+@@ -2913,7 +2980,7 @@
+ char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
+ // "exec" is passed in but not used. Creating the shared image for
+ // the code cache doesn't have an SHM_X executable permission to check.
+- assert(UseLargePages, "only for large pages");
++ assert(UseLargePages && UseSHM, "only for SHM large pages");
+
+ key_t key = IPC_PRIVATE;
+ char *addr;
+@@ -2980,19 +3047,19 @@
+ return _large_page_size;
+ }
+
+-// Linux does not support anonymous mmap with large page memory. The only way
+-// to reserve large page memory without file backing is through SysV shared
+-// memory API. The entire memory region is committed and pinned upfront.
+-// Hopefully this will change in the future...
++// HugeTLBFS allows application to commit large page memory on demand;
++// with SysV SHM the entire memory region must be allocated as shared
++// memory.
+ bool os::can_commit_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ bool os::can_execute_large_page_memory() {
+- return false;
++ return UseHugeTLBFS;
+ }
+
+ // Reserve memory at an arbitrary address, only if that area is
++// Reserve memory at an arbitrary address, only if that area is
+ // available (and not reserved for something else).
+
+ char* os::attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
+@@ -4081,6 +4148,23 @@
+ UseNUMA = false;
+ }
+ }
++ // With SHM large pages we cannot uncommit a page, so there's not way
++ // we can make the adaptive lgrp chunk resizing work. If the user specified
++ // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
++ // disable adaptive resizing.
++ if (UseNUMA && UseLargePages && UseSHM) {
++ if (!FLAG_IS_DEFAULT(UseNUMA)) {
++ if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) {
++ UseLargePages = false;
++ } else {
++ warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
++ UseAdaptiveSizePolicy = false;
++ UseAdaptiveNUMAChunkSizing = false;
++ }
++ } else {
++ UseNUMA = false;
++ }
++ }
+ if (!UseNUMA && ForceNUMA) {
+ UseNUMA = true;
+ }
+@@ -4986,6 +5070,43 @@
+ // JSR166
+ // -------------------------------------------------------
+
++bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
++ bool result = false;
++ void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
++ MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
++ -1, 0);
++
++ if (p != (void *) -1) {
++ // We don't know if this really is a huge page or not.
++ FILE *fp = fopen("/proc/self/maps", "r");
++ if (fp) {
++ while (!feof(fp)) {
++ char chars[257];
++ long x = 0;
++ if (fgets(chars, sizeof(chars), fp)) {
++ if (sscanf(chars, "%lx-%*lx", &x) == 1
++ && x == (long)p) {
++ if (strstr (chars, "hugepage")) {
++ result = true;
++ break;
++ }
++ }
++<+ }
++ }
++ fclose(fp);
++ }
++ munmap (p, page_size);
++ if (result)
++ return true;
++ }
++
++ if (warn) {
++ warning("HugeTLBFS is not supported by the operating system.");
++ }
++
++ return result;
++}
++
+ /*
+ * The solaris and linux implementations of park/unpark are fairly
+ * conservative for now, but can be improved. They currently use a
+diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp openjdk/hotspot/src/os/linux/vm/os_linux.hpp
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp 2011-03-16 02:30:16.000000000 +0000
++++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp 2011-05-04 11:57:15.088471194 +0100
+@@ -86,6 +86,9 @@
+
+ static void rebuild_cpu_to_node_map();
+ static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
++
++ static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
++
+ public:
+ static void init_thread_fpu_state();
+ static int get_fpu_control_word();
diff -r 2f549e3a4720 patches/openjdk/7037939-hugepage.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/openjdk/7037939-hugepage.patch Fri May 13 18:15:09 2011 +0100
@@ -0,0 +1,71 @@
+
+# HG changeset patch
+# User iveresov
+# Date 1303843594 25200
+# Node ID c303b3532d4ada074facc42292219952c2a4204e
+# Parent d6cdc6c77582408f6450bdda3ffc831d44298714
+7037939: NUMA: Disable adaptive resizing if SHM large pages are used
+Summary: Make the NUMA allocator behave properly with SHM and ISM large pages.
+Reviewed-by: ysr
+
+--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp Sat Apr 23 04:20:09 2011 -0700
++++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp Tue Apr 26 11:46:34 2011 -0700
+@@ -4170,6 +4170,23 @@ jint os::init_2(void)
+ UseNUMA = false;
+ }
+ }
++ // With SHM large pages we cannot uncommit a page, so there's not way
++ // we can make the adaptive lgrp chunk resizing work. If the user specified
++ // both UseNUMA and UseLargePages (or UseSHM) on the command line - warn and
++ // disable adaptive resizing.
++ if (UseNUMA && UseLargePages && UseSHM) {
++ if (!FLAG_IS_DEFAULT(UseNUMA)) {
++ if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseSHM)) {
++ UseLargePages = false;
++ } else {
++ warning("UseNUMA is not fully compatible with SHM large pages, disabling adaptive resizing");
++ UseAdaptiveSizePolicy = false;
++ UseAdaptiveNUMAChunkSizing = false;
++ }
++ } else {
++ UseNUMA = false;
++ }
++ }
+ if (!UseNUMA && ForceNUMA) {
+ UseNUMA = true;
+ }
+--- openjdk.patched/hotspot/src/os/solaris/vm/os_solaris.cpp Sat Apr 23 04:20:09 2011 -0700
++++ openjdk/hotspot/src/os/solaris/vm/os_solaris.cpp Tue Apr 26 11:46:34 2011 -0700
+@@ -2826,7 +2826,9 @@ void os::realign_memory(char *addr, size
+ void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
+ assert((intptr_t)addr % alignment_hint == 0, "Address should be aligned.");
+ assert((intptr_t)(addr + bytes) % alignment_hint == 0, "End should be aligned.");
+- Solaris::set_mpss_range(addr, bytes, alignment_hint);
++ if (UseLargePages && UseMPSS) {
++ Solaris::set_mpss_range(addr, bytes, alignment_hint);
++ }
+ }
+
+ // Tell the OS to make the range local to the first-touching LWP
+@@ -5041,6 +5043,20 @@ jint os::init_2(void) {
+ FREE_C_HEAP_ARRAY(int, lgrp_ids);
+ if (lgrp_num < 2) {
+ // There's only one locality group, disable NUMA.
++ UseNUMA = false;
++ }
++ }
++ // ISM is not compatible with the NUMA allocator - it always allocates
++ // pages round-robin across the lgroups.
++ if (UseNUMA && UseLargePages && UseISM) {
++ if (!FLAG_IS_DEFAULT(UseNUMA)) {
++ if (FLAG_IS_DEFAULT(UseLargePages) && FLAG_IS_DEFAULT(UseISM)) {
++ UseLargePages = false;
++ } else {
++ warning("UseNUMA is not compatible with ISM large pages, disabling NUMA allocator");
++ UseNUMA = false;
++ }
++ } else {
+ UseNUMA = false;
+ }
+ }
+
diff -r 2f549e3a4720 patches/openjdk/7043564-hugepage.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/openjdk/7043564-hugepage.patch Fri May 13 18:15:09 2011 +0100
@@ -0,0 +1,21 @@
+
+# HG changeset patch
+# User iveresov
+# Date 1305055570 25200
+# Node ID 97b64f73103bddbcd9d987fd13854957d2a80600
+# Parent fc2b798ab316df025526f208aeeef19609ad51b3
+7043564: compile warning and copyright fixes
+Summary: Fixed the warning, also fixed copyrights in a bunch of files.
+Reviewed-by: johnc, kvn
+
+--- openjdk.prev/hotspot/src/os/linux/vm/os_linux.cpp Tue May 10 00:33:21 2011 -0700
++++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp Tue May 10 12:26:10 2011 -0700
+@@ -2850,7 +2850,7 @@ bool os::Linux::hugetlbfs_sanity_check(b
+ char chars[257];
+ long x = 0;
+ if (fgets(chars, sizeof(chars), fp)) {
+- if (sscanf(chars, "%lx-%*lx", &x) == 1
++ if (sscanf(chars, "%lx-%*x", &x) == 1
+ && x == (long)p) {
+ if (strstr (chars, "hugepage")) {
+ result = true;
More information about the distro-pkg-dev
mailing list