Support huge pages on old Linux kernels

Andrew Haley aph at redhat.com
Fri Jun 3 09:33:11 PDT 2011


This is a patch that allows huge pages to be used more easily on older
kernels that don't have MAP_HUGETLB.  The idea is that you create a
filesystem of type hugetlbfs like so:

# mount -t hugetlbfs -o gid=aph,mode=0700 none /mnt/huge

And create some huge pages:

# echo 1024 > /proc/sys/vm/nr_hugepages

Java then searches for a huge page mount point and if one exists
uses it to create mappings.

I'm not sure how generally useful this is.  For systems with long-
term support such as Red Hat Enterprise Linux this makes sense, but
are there any distros where this might be useful?  I don't really want
to push this upstream with a bunch of #defines or probes to test the
OS's capability: that code is complicated enough already.

Andrew.


diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp openjdk/hotspot/src/os/linux/vm/os_linux.cpp
--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp	2011-05-26 17:01:38.998093000 +0100
+++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp	2011-06-03 17:15:33.074759103 +0100
@@ -164,6 +164,8 @@
 /* Used to protect dlsym() calls */
 static pthread_mutex_t dl_mutex;

+static os::HugePagesFile huge_pages_file;
+
 ////////////////////////////////////////////////////////////////////////////////
 // utility functions

@@ -2495,24 +2497,14 @@
   return res != (uintptr_t) MAP_FAILED;
 }

-// Define MAP_HUGETLB here so we can build HotSpot on old systems.
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000
-#endif
-
-// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
-#ifndef MADV_HUGEPAGE
-#define MADV_HUGEPAGE 14
-#endif
-
 bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
                        bool exec) {
   if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
     int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
     uintptr_t res =
       (uintptr_t) ::mmap(addr, size, prot,
-                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
-                         -1, 0);
+                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
+                         huge_pages_file.fd, 0);
     return res != (uintptr_t) MAP_FAILED;
   }

@@ -2520,11 +2512,7 @@
 }

 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
-  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
-    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
-    // be supported or the memory may already be backed by huge pages.
-    ::madvise(addr, bytes, MADV_HUGEPAGE);
-  }
+  // There's nothing we can do here.
 }

 void os::free_memory(char *addr, size_t bytes) {
@@ -5070,36 +5058,90 @@
 // JSR166
 // -------------------------------------------------------

+
+// Parse a field, delimited by spaces, in a line.  Skip leading space
+// characters, scan until the final character.  Terminate the string
+// with a null character.  Return the address of the first non-space
+// character.
+static char *scan_field(char **start)
+{
+  char *s = *start;
+  while (*s && isspace(*s))
+    s++;
+
+  char *field = s;
+  while (*s && ! isspace(*s))
+    s++;
+
+  if (*s)
+    *s++ = 0;
+
+  *start = s;
+  return field;
+}
+
 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
-  bool result = false;
-  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
-                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
-                  -1, 0);
-
-  if (p != (void *) -1) {
-    // We don't know if this really is a huge page or not.
-    FILE *fp = fopen("/proc/self/maps", "r");
-    if (fp) {
-      while (!feof(fp)) {
-        char chars[257];
-        long x = 0;
-        if (fgets(chars, sizeof(chars), fp)) {
-          if (sscanf(chars, "%lx-%*x", &x) == 1
-              && x == (long)p) {
-            if (strstr (chars, "hugepage")) {
-              result = true;
-              break;
-            }
-          }
-        }
+  FILE *f = fopen ("/proc/mounts", "r");
+
+  if (! f)
+    return false;
+
+  size_t line_len = 256;
+  char *line = (char*)malloc (line_len);
+
+  int result = false;
+
+  if (line) {
+
+    while (getline (&line, &line_len, f)) {
+      char *mount_point;
+      char *fs_type;
+      char *s = line;
+
+      scan_field (&s);
+      mount_point = scan_field (&s);
+      fs_type = scan_field (&s);
+
+      if (strcmp (fs_type, "hugetlbfs") == 0) {
+	const char *filename = "/java-XXXXXX";
+	size_t mount_point_len = strlen (mount_point);
+	size_t filename_len = strlen (filename);
+	char *tmpl = (char *)malloc (mount_point_len + filename_len +1);
+
+	if (! tmpl)
+	  break;
+
+	memcpy (tmpl, mount_point, mount_point_len);
+	memcpy (tmpl + mount_point_len, filename, filename_len);
+	tmpl[mount_point_len + filename_len] = 0;
+
+	huge_pages_file.fd = mkstemp (tmpl);
+	if (huge_pages_file.fd != -1) {
+	  int prot = PROT_READ|PROT_WRITE;
+	  void *res =
+	    ::mmap(NULL, page_size, prot,
+		   MAP_PRIVATE|MAP_ANONYMOUS,
+		   huge_pages_file.fd, 0);
+
+	  if (res) {
+	    huge_pages_file.name = tmpl;
+	    ::munmap (res, page_size);
+	    result = true;
+	    break;
+	  }
+
+	  ::unlink (tmpl);
+	}
+	free (tmpl);
       }
-      fclose(fp);
     }
-    munmap (p, page_size);
-    if (result)
-      return true;
+
+    free (line);
   }

+  if (result)
+    return true;
+
   if (warn) {
     warning("HugeTLBFS is not supported by the operating system.");
   }
Only in openjdk/hotspot/src/os/linux/vm: os_linux.cpp~
diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp openjdk/hotspot/src/os/linux/vm/os_linux.hpp
--- openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp	2011-05-26 17:01:38.908088000 +0100
+++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp	2011-06-03 16:05:53.570180329 +0100
@@ -335,4 +335,23 @@
     }
 } ;

+class HugePagesFile {
+public:
+  int fd;
+  char *name;
+
+  HugePagesFile() {
+    fd = -1;
+    name = NULL;
+  }
+
+  ~HugePagesFile() {
+    int status;
+    if (name) {
+      status = ::unlink (name);
+      assert_status(status == 0, status, "unlink hugepagefile");
+    }
+  }
+} ;
+
 #endif // OS_LINUX_VM_OS_LINUX_HPP



More information about the distro-pkg-dev mailing list