Support huge pages on old Linux kernels
Dr Andrew John Hughes
ahughes at redhat.com
Wed Jun 8 06:13:34 PDT 2011
On 17:33 Fri 03 Jun , Andrew Haley wrote:
> This is a patch that allows huge pages to be used more easily on older
> kernels that don't have MAP_HUGETLB. The idea is that you create a
> filesystem of type hugetlbfs like so:
>
> # mount -t hugetlbfs -o gid=aph,mode=0700 none /mnt/huge
>
> And create some huge pages:
>
> # echo 1024 > /proc/sys/vm/nr_hugepages
>
> Java then searches for a huge page mount point and if one exists
> uses it to create mappings.
>
> I'm not sure how generally useful this is. For systems with long-
> term support such as Red Hat Enterprise Linux this makes sense, but
> are there any distros where this might be useful? I don't really want
> to push this upstream with a bunch of #defines or probes to test the
> OS's capability: that code is complicated enough already.
>
Does this still work as before on systems with MAP_HUGETLB? The patch
seems to be removing a bunch of existing code rather than just adding a
fallback.
> Andrew.
>
>
> diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp openjdk/hotspot/src/os/linux/vm/os_linux.cpp
> --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.cpp 2011-05-26 17:01:38.998093000 +0100
> +++ openjdk/hotspot/src/os/linux/vm/os_linux.cpp 2011-06-03 17:15:33.074759103 +0100
> @@ -164,6 +164,8 @@
> /* Used to protect dlsym() calls */
> static pthread_mutex_t dl_mutex;
>
> +static os::HugePagesFile huge_pages_file;
> +
> ////////////////////////////////////////////////////////////////////////////////
> // utility functions
>
> @@ -2495,24 +2497,14 @@
> return res != (uintptr_t) MAP_FAILED;
> }
>
> -// Define MAP_HUGETLB here so we can build HotSpot on old systems.
> -#ifndef MAP_HUGETLB
> -#define MAP_HUGETLB 0x40000
> -#endif
> -
> -// Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
> -#ifndef MADV_HUGEPAGE
> -#define MADV_HUGEPAGE 14
> -#endif
> -
> bool os::commit_memory(char* addr, size_t size, size_t alignment_hint,
> bool exec) {
> if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
> int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
> uintptr_t res =
> (uintptr_t) ::mmap(addr, size, prot,
> - MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
> - -1, 0);
> + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
> + huge_pages_file.fd, 0);
> return res != (uintptr_t) MAP_FAILED;
> }
>
> @@ -2520,11 +2512,7 @@
> }
>
> void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
> - if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
> - // We don't check the return value: madvise(MADV_HUGEPAGE) may not
> - // be supported or the memory may already be backed by huge pages.
> - ::madvise(addr, bytes, MADV_HUGEPAGE);
> - }
> + // There's nothing we can do here.
> }
>
> void os::free_memory(char *addr, size_t bytes) {
> @@ -5070,36 +5058,90 @@
> // JSR166
> // -------------------------------------------------------
>
> +
> +// Parse a field, delimited by spaces, in a line. Skip leading space
> +// characters, scan until the final character. Terminate the string
> +// with a null character. Return the address of the first non-space
> +// character.
> +static char *scan_field(char **start)
> +{
> + char *s = *start;
> + while (*s && isspace(*s))
> + s++;
> +
> + char *field = s;
> + while (*s && ! isspace(*s))
> + s++;
> +
> + if (*s)
> + *s++ = 0;
> +
> + *start = s;
> + return field;
> +}
> +
> bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) {
> - bool result = false;
> - void *p = mmap (NULL, page_size, PROT_READ|PROT_WRITE,
> - MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB,
> - -1, 0);
> -
> - if (p != (void *) -1) {
> - // We don't know if this really is a huge page or not.
> - FILE *fp = fopen("/proc/self/maps", "r");
> - if (fp) {
> - while (!feof(fp)) {
> - char chars[257];
> - long x = 0;
> - if (fgets(chars, sizeof(chars), fp)) {
> - if (sscanf(chars, "%lx-%*x", &x) == 1
> - && x == (long)p) {
> - if (strstr (chars, "hugepage")) {
> - result = true;
> - break;
> - }
> - }
> - }
> + FILE *f = fopen ("/proc/mounts", "r");
> +
> + if (! f)
> + return false;
> +
> + size_t line_len = 256;
> + char *line = (char*)malloc (line_len);
> +
> + int result = false;
> +
> + if (line) {
> +
> + while (getline (&line, &line_len, f)) {
> + char *mount_point;
> + char *fs_type;
> + char *s = line;
> +
> + scan_field (&s);
> + mount_point = scan_field (&s);
> + fs_type = scan_field (&s);
> +
> + if (strcmp (fs_type, "hugetlbfs") == 0) {
> + const char *filename = "/java-XXXXXX";
> + size_t mount_point_len = strlen (mount_point);
> + size_t filename_len = strlen (filename);
> + char *tmpl = (char *)malloc (mount_point_len + filename_len +1);
> +
> + if (! tmpl)
> + break;
> +
> + memcpy (tmpl, mount_point, mount_point_len);
> + memcpy (tmpl + mount_point_len, filename, filename_len);
> + tmpl[mount_point_len + filename_len] = 0;
> +
> + huge_pages_file.fd = mkstemp (tmpl);
> + if (huge_pages_file.fd != -1) {
> + int prot = PROT_READ|PROT_WRITE;
> + void *res =
> + ::mmap(NULL, page_size, prot,
> + MAP_PRIVATE|MAP_ANONYMOUS,
> + huge_pages_file.fd, 0);
> +
> + if (res) {
> + huge_pages_file.name = tmpl;
> + ::munmap (res, page_size);
> + result = true;
> + break;
> + }
> +
> + ::unlink (tmpl);
> + }
> + free (tmpl);
> }
> - fclose(fp);
> }
> - munmap (p, page_size);
> - if (result)
> - return true;
> +
> + free (line);
> }
>
> + if (result)
> + return true;
> +
> if (warn) {
> warning("HugeTLBFS is not supported by the operating system.");
> }
> Only in openjdk/hotspot/src/os/linux/vm: os_linux.cpp~
> diff -u -r openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp openjdk/hotspot/src/os/linux/vm/os_linux.hpp
> --- openjdk.patched/hotspot/src/os/linux/vm/os_linux.hpp 2011-05-26 17:01:38.908088000 +0100
> +++ openjdk/hotspot/src/os/linux/vm/os_linux.hpp 2011-06-03 16:05:53.570180329 +0100
> @@ -335,4 +335,23 @@
> }
> } ;
>
> +class HugePagesFile {
> +public:
> + int fd;
> + char *name;
> +
> + HugePagesFile() {
> + fd = -1;
> + name = NULL;
> + }
> +
> + ~HugePagesFile() {
> + int status;
> + if (name) {
> + status = ::unlink (name);
> + assert_status(status == 0, status, "unlink hugepagefile");
> + }
> + }
> +} ;
> +
> #endif // OS_LINUX_VM_OS_LINUX_HPP
--
Andrew :)
Free Java Software Engineer
Red Hat, Inc. (http://www.redhat.com)
Support Free Java!
Contribute to GNU Classpath and IcedTea
http://www.gnu.org/software/classpath
http://icedtea.classpath.org
PGP Key: F5862A37 (https://keys.indymedia.org/)
Fingerprint = EA30 D855 D50F 90CD F54D 0698 0713 C3ED F586 2A37
More information about the distro-pkg-dev
mailing list