PING: Linux: Support transparent hugepages
Andrew Haley
aph at redhat.com
Wed Apr 6 08:21:55 PDT 2011
On 04/04/2011 05:43 PM, Igor Veresov wrote:
> I didn't mean the work I mentioned in the last paragraph that you quoted
> exists, I rather meant that it should be done in order to implement
> proper transparent pages support in linux (because we want the older
> linux kernels work too).
>
> I think it should be analogous to the Solaris implementation, which can
> be found here:
> http://hg.openjdk.java.net/jdk7/hotspot/hotspot/file/2dbcb4a4d8da/src/os/solaris/vm/os_solaris.cpp
Here is the full patch, with backwards compatibility. It builds on
old and new systems, preferring MAP_HUGETLB but falling back to the
old SysV shmget() if that doesn't work.
If this looks right to you, I'll prepare a CR for pushing. I'll need
a bug ID, please.
Thanks,
Andrew.
diff -r 2dbcb4a4d8da src/os/linux/vm/globals_linux.hpp
--- a/src/os/linux/vm/globals_linux.hpp Fri Apr 01 20:44:31 2011 -0700
+++ b/src/os/linux/vm/globals_linux.hpp Wed Apr 06 15:46:23 2011 +0100
@@ -33,9 +33,15 @@
"enable support for Oprofile profiler") \
\
product(bool, UseLinuxPosixThreadCPUClocks, true, \
- "enable fast Linux Posix clocks where available")
-// NB: The default value of UseLinuxPosixThreadCPUClocks may be
-// overridden in Arguments::parse_each_vm_init_arg.
+ "enable fast Linux Posix clocks where available") \
+/* NB: The default value of UseLinuxPosixThreadCPUClocks may be \
+ overridden in Arguments::parse_each_vm_init_arg. */ \
+ \
+ product(bool, UseHugeTLBFS, false, \
+ "Use MAP_HUGETLB for large pages") \
+ \
+ product(bool, UseSHM, false, \
+ "Use SYSV shared memory for large pages")
//
// Defines Linux-specific default values. The flags are available on all
diff -r 2dbcb4a4d8da src/os/linux/vm/os_linux.cpp
--- a/src/os/linux/vm/os_linux.cpp Fri Apr 01 20:44:31 2011 -0700
+++ b/src/os/linux/vm/os_linux.cpp Wed Apr 06 15:46:23 2011 +0100
@@ -2465,8 +2465,22 @@
return res != (uintptr_t) MAP_FAILED;
}
+// Define MAP_HUGETLB here so we can build HotSpot on old systems.
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000
+#endif
+
bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
bool exec) {
+ if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+ int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
+ uintptr_t res =
+ (uintptr_t) ::mmap(addr, size, prot,
+ MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
+ -1, 0);
+ return res != (uintptr_t) MAP_FAILED;
+ }
+
return commit_memory(addr, size, exec);
}
@@ -2818,6 +2832,42 @@
return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
}
+bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
+ bool result = false;
+ void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
+ -1, 0);
+
+ if (p != (void *) -1) {
+ // We don't know if this really is a huge page or not.
+ FILE *fp = fopen("/proc/self/maps", "r");
+ if (fp) {
+ while (!feof(fp)) {
+ char chars[257];
+ long x = 0;
+ if (fgets(chars, sizeof chars, fp)) {
+ if (sscanf(chars, "%lx-%*lx", &x) == 1
+ && x == (long)p) {
+ if (strstr (chars, "hugepage")) {
+ result = true;
+ break;
+ }
+ }
+ }
+ }
+ fclose(fp);
+ }
+ munmap (p, page_size);
+ if (result)
+ return true;
+ }
+
+ if (warn) {
+ warning("HugeTLBFS is not supported by the operating system.");
+ }
+}
+
+
/*
* Set the coredump_filter bits to include largepages in core dump (bit 6)
*
@@ -2860,7 +2910,16 @@
static size_t _large_page_size = 0;
bool os::large_page_init() {
- if (!UseLargePages) return false;
+ if (!UseLargePages) {
+ UseHugeTLBFS = false;
+ UseSHM = false;
+ return false;
+ }
+
+ if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
+ // Our user has not expressed a preference, so we'll try both.
+ UseHugeTLBFS = UseSHM = true;
+ }
if (LargePageSizeInBytes) {
_large_page_size = LargePageSizeInBytes;
@@ -2905,6 +2964,9 @@
}
}
+ // print a warning if any large page related flag is specified on command line
+ bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
+
const size_t default_page_size = (size_t)Linux::page_size();
if (_large_page_size > default_page_size) {
_page_sizes[0] = _large_page_size;
@@ -2912,6 +2974,14 @@
_page_sizes[2] = 0;
}
+ UseHugeTLBFS = UseHugeTLBFS &&
+ Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
+
+ if (UseHugeTLBFS)
+ UseSHM = false;
+
+ UseLargePages = UseHugeTLBFS || UseSHM;
+
set_coredump_filter();
// Large page support is available on 2.6 or newer kernel, some vendors
@@ -2928,7 +2998,7 @@
char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
// "exec" is passed in but not used. Creating the shared image for
// the code cache doesn't have an SHM_X executable permission to check.
- assert(UseLargePages, "only for large pages");
+ assert(UseLargePages && UseSHM, "only for SHM large pages");
key_t key = IPC_PRIVATE;
char *addr;
@@ -2995,16 +3065,15 @@
return _large_page_size;
}
-// Linux does not support anonymous mmap with large page memory. The only way
-// to reserve large page memory without file backing is through SysV shared
-// memory API. The entire memory region is committed and pinned upfront.
-// Hopefully this will change in the future...
+// HugeTLBFS allows application to commit large page memory on demand;
+// with SysV SHM the entire memory region must be allocated as shared
+// memory.
bool os::can_commit_large_page_memory() {
- return false;
+ return UseHugeTLBFS;
}
bool os::can_execute_large_page_memory() {
- return false;
+ return UseHugeTLBFS;
}
// Reserve memory at an arbitrary address, only if that area is
diff -r 2dbcb4a4d8da src/os/linux/vm/os_linux.hpp
--- a/src/os/linux/vm/os_linux.hpp Fri Apr 01 20:44:31 2011 -0700
+++ b/src/os/linux/vm/os_linux.hpp Wed Apr 06 15:46:23 2011 +0100
@@ -86,6 +86,9 @@
static void rebuild_cpu_to_node_map();
static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; }
+
+ static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
+
public:
static void init_thread_fpu_state();
static int get_fpu_control_word();
More information about the hotspot-dev
mailing list