Support huge pages on old Linux kernels

Dr Andrew John Hughes ahughes at redhat.com
Wed Jun 8 06:13:34 PDT 2011


On 17:33 Fri 03 Jun     , Andrew Haley wrote:
> This is a patch that allows huge pages to be used more easily on older
> kernels that don't have MAP_HUGETLB.  The idea is that you create a
> filesystem of type hugetlbfs like so:
> 
> # mount -t hugetlbfs -o gid=aph,mode=0700 none /mnt/huge
> 
> And create some huge pages:
> 
> # echo 1024 > /proc/sys/vm/nr_hugepages
> 
> Java then searches for a huge page mount point and if one exists
> uses it to create mappings.
> 
> I'm not sure how generally useful this is.  For systems with long-
> term support such as Red Hat Enterprise Linux this makes sense, but
> are there any distros where this might be useful?  I don't really want
> to push this upstream with a bunch of #defines or probes to test the
> OS's capability: that code is complicated enough already.
> 

Does this still work as before on systems with MAP_HUGETLB?  The patch
seems to be removing a bunch of existing code rather than just adding a
fallback.

> Andrew.
> 
> 
> diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp openjdk/hotspot/src/os/linux/vm/os_linux.cpp
> --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp	2011-05-26 17:01:38.998093000 +0100
> +++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp	2011-06-03 17:15:33.074759103 +0100
> @@ -164,6 +164,8 @@
>  /* Used to protect dlsym() calls */
>  static pthread_mutex_t dl_mutex;
> 
> +static os::HugePagesFile huge_pages_file;
> +
>  ////////////////////////////////////////////////////////////////////////////////
>  // utility functions
> 
> @@ -2495,24 +2497,14 @@
>    return res != (uintptr_t) MAP_FAILED;
>  }
> 
> -// Define MAP_HUGETLB here so we can build HotSpot on old systems.
> -#ifndef MAP_HUGETLB
> -#define MAP_HUGETLB 0x40000
> -#endif
> -
> -// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
> -#ifndef MADV_HUGEPAGE
> -#define MADV_HUGEPAGE 14
> -#endif
> -
>  bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
>                         bool exec) {
>    if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
>      int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
>      uintptr_t res =
>        (uintptr_t) ::mmap(addr, size, prot,
> -                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
> -                         -1, 0);
> +                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
> +                         huge_pages_file.fd, 0);
>      return res != (uintptr_t) MAP_FAILED;
>    }
> 
> @@ -2520,11 +2512,7 @@
>  }
> 
>  void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
> -  if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
> -    // We don't check the return value: madvise(MADV_HUGEPAGE) may not
> -    // be supported or the memory may already be backed by huge pages.
> -    ::madvise(addr, bytes, MADV_HUGEPAGE);
> -  }
> +  // There's nothing we can do here.
>  }
> 
>  void os::free_memory(char *addr, size_t bytes) {
> @@ -5070,36 +5058,90 @@
>  // JSR166
>  // -------------------------------------------------------
> 
> +
> +// Parse a field, delimited by spaces, in a line.  Skip leading space
> +// characters, scan until the final character.  Terminate the string
> +// with a null character.  Return the address of the first non-space
> +// character.
> +static char *scan_field(char **start)
> +{
> +  char *s = *start;
> +  while (*s && isspace(*s))
> +    s++;
> +
> +  char *field = s;
> +  while (*s && ! isspace(*s))
> +    s++;
> +
> +  if (*s)
> +    *s++ = 0;
> +
> +  *start = s;
> +  return field;
> +}
> +
>  bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
> -  bool result = false;
> -  void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
> -                  MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
> -                  -1, 0);
> -
> -  if (p != (void *) -1) {
> -    // We don't know if this really is a huge page or not.
> -    FILE *fp = fopen("/proc/self/maps", "r");
> -    if (fp) {
> -      while (!feof(fp)) {
> -        char chars[257];
> -        long x = 0;
> -        if (fgets(chars, sizeof(chars), fp)) {
> -          if (sscanf(chars, "%lx-%*x", &x) == 1
> -              && x == (long)p) {
> -            if (strstr (chars, "hugepage")) {
> -              result = true;
> -              break;
> -            }
> -          }
> -        }
> +  FILE *f = fopen ("/proc/mounts", "r");
> +
> +  if (! f)
> +    return false;
> +
> +  size_t line_len = 256;
> +  char *line = (char*)malloc (line_len);
> +
> +  int result = false;
> +
> +  if (line) {
> +
> +    while (getline (&line, &line_len, f)) {
> +      char *mount_point;
> +      char *fs_type;
> +      char *s = line;
> +
> +      scan_field (&s);
> +      mount_point = scan_field (&s);
> +      fs_type = scan_field (&s);
> +
> +      if (strcmp (fs_type, "hugetlbfs") == 0) {
> +	const char *filename = "/java-XXXXXX";
> +	size_t mount_point_len = strlen (mount_point);
> +	size_t filename_len = strlen (filename);
> +	char *tmpl = (char *)malloc (mount_point_len + filename_len +1);
> +
> +	if (! tmpl)
> +	  break;
> +
> +	memcpy (tmpl, mount_point, mount_point_len);
> +	memcpy (tmpl + mount_point_len, filename, filename_len);
> +	tmpl[mount_point_len + filename_len] = 0;
> +
> +	huge_pages_file.fd = mkstemp (tmpl);
> +	if (huge_pages_file.fd != -1) {
> +	  int prot = PROT_READ|PROT_WRITE;
> +	  void *res =
> +	    ::mmap(NULL, page_size, prot,
> +		   MAP_PRIVATE|MAP_ANONYMOUS,
> +		   huge_pages_file.fd, 0);
> +
> +	  if (res) {
> +	    huge_pages_file.name = tmpl;
> +	    ::munmap (res, page_size);
> +	    result = true;
> +	    break;
> +	  }
> +
> +	  ::unlink (tmpl);
> +	}
> +	free (tmpl);
>        }
> -      fclose(fp);
>      }
> -    munmap (p, page_size);
> -    if (result)
> -      return true;
> +
> +    free (line);
>    }
> 
> +  if (result)
> +    return true;
> +
>    if (warn) {
>      warning("HugeTLBFS is not supported by the operating system.");
>    }
> Only in openjdk/hotspot/src/os/linux/vm: os_linux.cpp~
> diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp openjdk/hotspot/src/os/linux/vm/os_linux.hpp
> --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp	2011-05-26 17:01:38.908088000 +0100
> +++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp	2011-06-03 16:05:53.570180329 +0100
> @@ -335,4 +335,23 @@
>      }
>  } ;
> 
> +class HugePagesFile {
> +public:
> +  int fd;
> +  char *name;
> +
> +  HugePagesFile() {
> +    fd = -1;
> +    name = NULL;
> +  }
> +
> +  ~HugePagesFile() {
> +    int status;
> +    if (name) {
> +      status = ::unlink (name);
> +      assert_status(status == 0, status, "unlink hugepagefile");
> +    }
> +  }
> +} ;
> +
>  #endif // OS_LINUX_VM_OS_LINUX_HPP

-- 
Andrew :)

Free Java Software Engineer
Red Hat, Inc. (http://www.redhat.com)

Support Free Java!
Contribute to GNU Classpath and IcedTea
http://www.gnu.org/software/classpath
http://icedtea.classpath.org
PGP Key: F5862A37 (https://keys.indymedia.org/)
Fingerprint = EA30 D855 D50F 90CD F54D  0698 0713 C3ED F586 2A37



More information about the distro-pkg-dev mailing list