PING: Linux: Support transparent hugepages

Andrew Haley aph at redhat.com
Wed Apr 6 08:21:55 PDT 2011


On 04/04/2011 05:43 PM, Igor Veresov wrote:

> I didn't mean the work I mentioned in the last paragraph that you quoted 
> exists, I rather meant that it should be done in order to implement 
> proper transparent pages support in linux (because we want the older 
> linux kernels work too).
> 
> I think it should be analogous to the Solaris implementation, which can 
> be found here: 
> http://hg.openjdk.java.net/jdk7/hotspot/hotspot/file/2dbcb4a4d8da/src/os/solaris/vm/os_solaris.cpp 

Here is the full patch, with backwards compatibility.  It builds on
old and new systems, preferring MAP_HUGETLB but falling back to the
old SysV shmget() if that doesn't work.

If this looks right to you, I'll prepare a CR for pushing.  I'll need
a bug ID, please.

Thanks,
Andrew.


diff -r 2dbcb4a4d8da src/os/linux/vm/globals_linux.hpp
--- a/src/os/linux/vm/globals_linux.hpp	Fri Apr 01 20:44:31 2011 -0700
+++ b/src/os/linux/vm/globals_linux.hpp	Wed Apr 06 15:46:23 2011 +0100
@@ -33,9 +33,15 @@
         "enable support for Oprofile profiler")                     \
                                                                     \
   product(bool, UseLinuxPosixThreadCPUClocks, true,                 \
-          "enable fast Linux Posix clocks where available")
-// NB: The default value of UseLinuxPosixThreadCPUClocks may be
-// overridden in Arguments::parse_each_vm_init_arg.
+          "enable fast Linux Posix clocks where available")             \
+/*  NB: The default value of UseLinuxPosixThreadCPUClocks may be        \
+    overridden in Arguments::parse_each_vm_init_arg.  */                \
+                                                                        \
+  product(bool, UseHugeTLBFS, false,                                    \
+          "Use MAP_HUGETLB for large pages")                            \
+                                                                        \
+  product(bool, UseSHM, false,                                          \
+          "Use SYSV shared memory for large pages")

 //
 // Defines Linux-specific default values. The flags are available on all
diff -r 2dbcb4a4d8da src/os/linux/vm/os_linux.cpp
--- a/src/os/linux/vm/os_linux.cpp	Fri Apr 01 20:44:31 2011 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Wed Apr 06 15:46:23 2011 +0100
@@ -2465,8 +2465,22 @@
   return res != (uintptr_t) MAP_FAILED;
 }

+// Define MAP_HUGETLB here so we can build HotSpot on old systems.
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000
+#endif
+
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
+  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
+    int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
+    uintptr_t res =
+      (uintptr_t) ::mmap(addr, size, prot,
+			 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
+			 -1, 0);
+    return res != (uintptr_t) MAP_FAILED;
+  }
+
   return commit_memory(addr, size, exec);
 }

@@ -2818,6 +2832,42 @@
   return linux_mprotect(addr, size, PROT_READ|PROT_WRITE);
 }

+bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
+  bool result = false;
+  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
+                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
+		  -1, 0);
+
+  if (p != (void *) -1) {
+    // We don't know if this really is a huge page or not.
+    FILE *fp = fopen("/proc/self/maps", "r");
+    if (fp) {
+      while (!feof(fp)) {
+        char chars[257];
+        long x = 0;
+        if (fgets(chars, sizeof chars, fp)) {
+          if (sscanf(chars, "%lx-%*lx", &x) == 1
+              && x == (long)p) {
+            if (strstr (chars, "hugepage")) {
+              result = true;
+              break;
+            }
+          }
+        }
+      }
+      fclose(fp);
+    }
+    munmap (p, page_size);
+    if (result)
+      return true;
+  }
+
+  if (warn) {
+    warning("HugeTLBFS is not supported by the operating system.");
+  }
+}
+
+
 /*
 * Set the coredump_filter bits to include largepages in core dump (bit 6)
 *
@@ -2860,7 +2910,16 @@
 static size_t _large_page_size = 0;

 bool os::large_page_init() {
-  if (!UseLargePages) return false;
+  if (!UseLargePages) {
+    UseHugeTLBFS = false;
+    UseSHM = false;
+    return false;
+  }
+
+  if (FLAG_IS_DEFAULT(UseHugeTLBFS) && FLAG_IS_DEFAULT(UseSHM)) {
+    // Our user has not expressed a preference, so we'll try both.
+    UseHugeTLBFS = UseSHM = true;
+  }

   if (LargePageSizeInBytes) {
     _large_page_size = LargePageSizeInBytes;
@@ -2905,6 +2964,9 @@
     }
   }

+  // print a warning if any large page related flag is specified on command line
+  bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS);
+
   const size_t default_page_size = (size_t)Linux::page_size();
   if (_large_page_size > default_page_size) {
     _page_sizes[0] = _large_page_size;
@@ -2912,6 +2974,14 @@
     _page_sizes[2] = 0;
   }

+  UseHugeTLBFS = UseHugeTLBFS &&
+                 Linux::hugetlbfs_sanity_check(warn_on_failure, _large_page_size);
+
+  if (UseHugeTLBFS)
+    UseSHM = false;
+
+  UseLargePages = UseHugeTLBFS || UseSHM;
+
   set_coredump_filter();

   // Large page support is available on 2.6 or newer kernel, some vendors
@@ -2928,7 +2998,7 @@
 char* os::reserve_memory_special(size_t bytes, char* req_addr, bool exec) {
   // "exec" is passed in but not used.  Creating the shared image for
   // the code cache doesn't have an SHM_X executable permission to check.
-  assert(UseLargePages, "only for large pages");
+  assert(UseLargePages && UseSHM, "only for SHM large pages");

   key_t key = IPC_PRIVATE;
   char *addr;
@@ -2995,16 +3065,15 @@
   return _large_page_size;
 }

-// Linux does not support anonymous mmap with large page memory. The only way
-// to reserve large page memory without file backing is through SysV shared
-// memory API. The entire memory region is committed and pinned upfront.
-// Hopefully this will change in the future...
+// HugeTLBFS allows application to commit large page memory on demand;
+// with SysV SHM the entire memory region must be allocated as shared
+// memory.
 bool os::can_commit_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }

 bool os::can_execute_large_page_memory() {
-  return false;
+  return UseHugeTLBFS;
 }

 // Reserve memory at an arbitrary address, only if that area is
diff -r 2dbcb4a4d8da src/os/linux/vm/os_linux.hpp
--- a/src/os/linux/vm/os_linux.hpp	Fri Apr 01 20:44:31 2011 -0700
+++ b/src/os/linux/vm/os_linux.hpp	Wed Apr 06 15:46:23 2011 +0100
@@ -86,6 +86,9 @@

   static void rebuild_cpu_to_node_map();
   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
+
+  static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
+
  public:
   static void init_thread_fpu_state();
   static int  get_fpu_control_word();


More information about the hotspot-dev mailing list