UseNUMA membind Issue in openJDK

White, Derek Derek.White at cavium.com
Thu May 3 21:26:59 UTC 2018


Hi Swati,

I think this was reported as JDK-8189922 "UseNUMA memory interleaving allocates heap for unavailable nodes". https://bugs.openjdk.java.net/browse/JDK-8189922

Thanks for working on a fix for this!

 - Derek

> -----Original Message-----
> From: hotspot-dev [mailto:hotspot-dev-bounces at openjdk.java.net] On
> Behalf Of Swati Sharma
> Sent: Wednesday, May 02, 2018 6:24 AM
> To: David Holmes <david.holmes at oracle.com>
> Cc: hotspot-dev at openjdk.java.net; Prakash.Raghavendra at amd.com
> Subject: Re: UseNUMA membind Issue in openJDK
> 
> Hi David,
> 
> I have localized the struct bitmask declaration in os_linux.cpp.
> 
> Here is the updated patch
> ===================================PATCH======================
> =============================
> diff --git a/src/hotspot/os/linux/os_linux.cpp
> b/src/hotspot/os/linux/os_linux.cpp
> --- a/src/hotspot/os/linux/os_linux.cpp
> +++ b/src/hotspot/os/linux/os_linux.cpp
> @@ -2832,14 +2832,42 @@
>    // Map all node ids in which is possible to allocate memory. Also nodes are
>    // not always consecutively available, i.e. available from 0 to the highest
>    // node number.
> +  // If the nodes have been bound explicitly using numactl membind,
> + then  // allocate memory from those nodes only.
>    for (size_t node = 0; node <= highest_node_number; node++) {
> -    if (Linux::isnode_in_configured_nodes(node)) {
> +    if (Linux::isnode_in_bounded_nodes(node)) {
>        ids[i++] = node;
>      }
>    }
>    return i;
>  }
> 
> +extern "C"  struct bitmask {
> +  unsigned long size; /* number of bits in the map */
> +  unsigned long *maskp;
> +};
> +// Check if single memory node bound.
> +// Returns true if single memory node bound.
> +bool os::Linux::issingle_node_bound() {
> +  struct bitmask* bmp = _numa_get_membind != NULL ?
> _numa_get_membind() :
> NULL;
> +  if(bmp == NULL) return false;
> +  int issingle = 0;
> +  // System can have more than 64 nodes so check in all the elements of
> + // unsigned long array  for (unsigned long i = 0; i < (bmp->size / (8
> + * sizeof(unsigned long)));
> i++) {
> +    if (bmp->maskp != NULL && (((bmp->maskp[i]) & (((bmp->maskp[i])) -
> + 1))
> == 0)) {
> +      issingle++;
> +    } else if (bmp->maskp[i] == 0) {
> +      continue;
> +    } else {
> +      return false;
> +    }
> +  }
> +  if (issingle == 1)
> +    return true;
> +  return false;
> +}
> +
>  bool os::get_page_info(char *start, page_info* info) {
>    return false;
>  }
> @@ -2930,6 +2958,10 @@
>                                                 libnuma_dlsym(handle,
> "numa_bitmask_isbitset")));
>        set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
>                                         libnuma_dlsym(handle, "numa_distance")));
> +
> set_numa_set_membind(CAST_TO_FN_PTR(numa_set_membind_func_t,
> +                                          libnuma_dlsym(handle,
> "numa_set_membind")));
> +
> set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
> +                                          libnuma_v2_dlsym(handle,
> "numa_get_membind")));
> 
>        if (numa_available() != -1) {
>          set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle,
> "numa_all_nodes")); @@ -3054,6 +3086,8 @@
> os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
> os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
>  os::Linux::numa_distance_func_t os::Linux::_numa_distance;
> +os::Linux::numa_set_membind_func_t os::Linux::_numa_set_membind;
> +os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
>  unsigned long* os::Linux::_numa_all_nodes;  struct bitmask*
> os::Linux::_numa_all_nodes_ptr;  struct bitmask*
> os::Linux::_numa_nodes_ptr; @@ -4962,8 +4996,9 @@
>      if (!Linux::libnuma_init()) {
>        UseNUMA = false;
>      } else {
> -      if ((Linux::numa_max_node() < 1)) {
> -        // There's only one node(they start from 0), disable NUMA.
> +      if ((Linux::numa_max_node() < 1) || Linux::issingle_node_bound()) {
> +        // If there's only one node(they start from 0) or if the process
> +        // is bound explicitly to a single node using membind, disable
> NUMA.
>          UseNUMA = false;
>        }
>      }
> diff --git a/src/hotspot/os/linux/os_linux.hpp
> b/src/hotspot/os/linux/os_linux.hpp
> --- a/src/hotspot/os/linux/os_linux.hpp
> +++ b/src/hotspot/os/linux/os_linux.hpp
> @@ -228,6 +228,8 @@
>    typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int
> node);
>    typedef void (*numa_interleave_memory_func_t)(void *start, size_t size,
> unsigned long *nodemask);
>    typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t
> size, struct bitmask* mask);
> +  typedef void (*numa_set_membind_func_t)(struct bitmask *mask);
> + typedef struct bitmask* (*numa_get_membind_func_t)(void);
> 
>    typedef void (*numa_set_bind_policy_func_t)(int policy);
>    typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp,
> unsigned int n); @@ -244,6 +246,8 @@
>    static numa_set_bind_policy_func_t _numa_set_bind_policy;
>    static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
>    static numa_distance_func_t _numa_distance;
> +  static numa_set_membind_func_t _numa_set_membind;  static
> + numa_get_membind_func_t _numa_get_membind;
>    static unsigned long* _numa_all_nodes;
>    static struct bitmask* _numa_all_nodes_ptr;
>    static struct bitmask* _numa_nodes_ptr; @@ -259,6 +263,8 @@
>    static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func)
> { _numa_set_bind_policy = func; }
>    static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t
> func) { _numa_bitmask_isbitset = func; }
>    static void set_numa_distance(numa_distance_func_t func) {
> _numa_distance = func; }
> +  static void set_numa_set_membind(numa_set_membind_func_t func) {
> _numa_set_membind = func; }
> +  static void set_numa_get_membind(numa_get_membind_func_t func) {
> _numa_get_membind = func; }
>    static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes =
> ptr; }
>    static void set_numa_all_nodes_ptr(struct bitmask **ptr) {
> _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
>    static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr
> = (ptr == NULL ? NULL : *ptr); } @@ -320,6 +326,15 @@
>      } else
>        return 0;
>    }
> +  // Check if node in bounded nodes
> +  static bool isnode_in_bounded_nodes(int node) {
> +    struct bitmask* bmp = _numa_get_membind != NULL ?
> + _numa_get_membind()
> : NULL;
> +    if (bmp != NULL && _numa_bitmask_isbitset != NULL &&
> _numa_bitmask_isbitset(bmp, node)) {
> +      return true;
> +    } else
> +      return false;
> +  }
> +  static bool issingle_node_bound();
>  };
> 
>  #endif // OS_LINUX_VM_OS_LINUX_HPP
> 
> ===============================================================
> =============================
> 
> Thanks,
> Swati
> 
> On Thu, Apr 26, 2018 at 6:10 PM, David Holmes <david.holmes at oracle.com>
> wrote:
> >
> > Hi Swati,
> >
> > On 26/04/2018 10:20 PM, Swati Sharma wrote:
> >>
> >> Hi Everyone,
> >>
> >> I work at AMD and this is my first patch as a new member of openJDK
> >> community.
> >
> >
> > Welcome!
> >
> > I can't comment on the actual NUMA details of the patch (though I can
> > see
> what you're doing), but the struct bitmask declaration in os.hpp should be
> localized in os_linux.hpp as far as I can see, as it's only needed internally in
> the Linux code.
> >
> > Thanks,
> > David
> > -----
> >
> >
> >> I have found some issue while running  specjbb2015 composite workload
> with
> >> the flag -XX:+UseNUMA. It seems that JVM does not allocate memory
> according
> >> to the explicit node binding done using "numactl --membind".
> >>
> >> E.g. If bound to a single memory node,  JVM divides the whole heap
> >> based
> on
> >> the total number of numa nodes available on the system which creates
> >> more logical groups(lgrps) than required which cannot be used except the
> one.
> >>
> >> The following examples will explain clearly :
> >> (Note : Collected GC logs with
> >> -Xlog:gc*=debug:file=gc.log:time,uptimemillis)
> >> 1) Allocating a heap of 22GB for single node divides the whole heap
> >> in 8 lgrp(Actual no of Nodes are 8)
> >>      $numactl --cpunodebind=0 --membind=0 java -Xmx24g -Xms24g
> >> -Xmn22g -XX:+UseNUMA <composite_application>
> >>
> >>      eden space 22511616K(22GB), 12% used
> >>      lgrp 0 space 2813952K, 100% used                       lgrp 1 space
> >> 2813952K, 0% used                          lgrp 2 space 2813952K, 0% used
> >>      lgrp 3 space 2813952K, 0% used                           lgrp 4
> space
> >> 2813952K, 0% used                          lgrp 5 space 2813952K, 0% used
> >>      lgrp 6 space 2813952K, 0% used                           lgrp 7
> space
> >> 2813952K, 0% used
> >>
> >> Observation : Instead of disabling UseNUMA for single node binding
> >> JVM divides the memory in 8 lgrps and allocates memory always on the
> >> bounded node hence in eden space allocation never happens more than
> 12%.
> >>
> >> 2) Another case of binding to node 0 and 7 results in dividing the
> >> heap
> in
> >> 8lgrp
> >>      $numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g -
> Xmn45g
> >>   -XX:+UseNUMA <composite_application>
> >>
> >>      eden space 46718976K, 6% used
> >>      lgrp 0 space 5838848K, 14% used                  lgrp 1 space
> 5838848K,
> >> 0% used                              lgrp 2 space 5838848K, 0% used
> >>      lgrp 3 space 5838848K, 0% used                    lgrp 4 space
> >> 5838848K, 0% used                              lgrp 5 space 5838848K, 0%
> >> used
> >>       lgrp 6 space 5838848K, 0% used                    lgrp 7 space
> >> 5847040K, 35% used
> >>
> >> Observation : Similar to first case allocation happens only on 0th
> >> and
> 7th
> >> node and rest of the lgrps never gets used.
> >>
> >> After applying the patch, JVM divides the given heap size according
> >> to
> the
> >> bounded memory nodes only.
> >>
> >> 1) Binding to single node disables UseNUMA
> >>      eden space 46718976K(45GB), 99% used
> >>
> >> Observation : UseNUMA gets disabled hence no lgrp creation and the
> >> whole heap allocation happens on the bounded node.
> >>
> >> 2) Binding to node 0 and 7
> >>       $ numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g
> -Xmn45g
> >>   -XX:+UseNUMA <composite_application>
> >>       eden space 46718976K(45GB), 99% used
> >>       lgrp 0 space 23359488K(23.5GB), 100% used            lgrp 7 space
> >> 23359488K(23.5GB), 99% used
> >>
> >> Observation : Only two lgrps gets created and heap size gets divided
> >> equally in both nodes.
> >>
> >> If there is no binding, then JVM will divide the whole heap based on
> >> the number of NUMA nodes available on the system.
> >>
> >> The following patch fixes the issue(attached also).
> >> Please review and let me know your comments.
> >>
> >> Regression testing using jtreg (make -J=1 run-test-tier1
> >> run-test-tier2) didn't show any new failures.
> >>
> >>
> ===============================PATCH==========================
> ==============
> >> diff --git a/src/hotspot/os/linux/os_linux.cpp
> >> b/src/hotspot/os/linux/os_linux.cpp
> >> --- a/src/hotspot/os/linux/os_linux.cpp
> >> +++ b/src/hotspot/os/linux/os_linux.cpp
> >> @@ -2832,8 +2832,10 @@
> >>     // Map all node ids in which is possible to allocate memory. Also
> nodes
> >> are
> >>     // not always consecutively available, i.e. available from 0 to
> >> the highest
> >>     // node number.
> >> +  // If the nodes have been bound explicitly using numactl membind,
> >> + then  // allocate memory from those nodes only.
> >>     for (size_t node = 0; node <= highest_node_number; node++) {
> >> -    if (Linux::isnode_in_configured_nodes(node)) {
> >> +    if (Linux::isnode_in_bounded_nodes(node)) {
> >>         ids[i++] = node;
> >>       }
> >>     }
> >> @@ -2930,6 +2932,10 @@
> >>
> >> libnuma_dlsym(handle, "numa_bitmask_isbitset")));
> >>         set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
> >>                                          libnuma_dlsym(handle,
> >> "numa_distance")));
> >> +
> set_numa_set_membind(CAST_TO_FN_PTR(numa_set_membind_func_t,
> >> +                                          libnuma_dlsym(handle,
> >> "numa_set_membind")));
> >> +
> set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
> >> +                                          libnuma_v2_dlsym(handle,
> >> "numa_get_membind")));
> >>
> >>         if (numa_available() != -1) {
> >>           set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle,
> >> "numa_all_nodes")); @@ -3054,6 +3060,8 @@
> >>   os::Linux::numa_set_bind_policy_func_t
> os::Linux::_numa_set_bind_policy;
> >>   os::Linux::numa_bitmask_isbitset_func_t
> os::Linux::_numa_bitmask_isbitset;
> >>   os::Linux::numa_distance_func_t os::Linux::_numa_distance;
> >> +os::Linux::numa_set_membind_func_t os::Linux::_numa_set_membind;
> >> +os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
> >>   unsigned long* os::Linux::_numa_all_nodes;
> >>   struct bitmask* os::Linux::_numa_all_nodes_ptr;
> >>   struct bitmask* os::Linux::_numa_nodes_ptr; @@ -4962,8 +4970,9 @@
> >>       if (!Linux::libnuma_init()) {
> >>         UseNUMA = false;
> >>       } else {
> >> -      if ((Linux::numa_max_node() < 1)) {
> >> -        // There's only one node(they start from 0), disable NUMA.
> >> +      if ((Linux::numa_max_node() < 1) ||
> >> + Linux::issingle_node_bound())
> {
> >> +        // If there's only one node(they start from 0) or if the process
> >> +        // is bound explicitly to a single node using membind,
> >> + disable
> >> NUMA.
> >>           UseNUMA = false;
> >>         }
> >>       }
> >> diff --git a/src/hotspot/os/linux/os_linux.hpp
> >> b/src/hotspot/os/linux/os_linux.hpp
> >> --- a/src/hotspot/os/linux/os_linux.hpp
> >> +++ b/src/hotspot/os/linux/os_linux.hpp
> >> @@ -228,6 +228,8 @@
> >>     typedef int (*numa_tonode_memory_func_t)(void *start, size_t
> >> size,
> int
> >> node);
> >>     typedef void (*numa_interleave_memory_func_t)(void *start, size_t
> size,
> >> unsigned long *nodemask);
> >>     typedef void (*numa_interleave_memory_v2_func_t)(void *start,
> >> size_t size, struct bitmask* mask);
> >> +  typedef void (*numa_set_membind_func_t)(struct bitmask *mask);
> >> + typedef struct bitmask* (*numa_get_membind_func_t)(void);
> >>
> >>     typedef void (*numa_set_bind_policy_func_t)(int policy);
> >>     typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp,
> >> unsigned int n); @@ -244,6 +246,8 @@
> >>     static numa_set_bind_policy_func_t _numa_set_bind_policy;
> >>     static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
> >>     static numa_distance_func_t _numa_distance;
> >> +  static numa_set_membind_func_t _numa_set_membind;  static
> >> + numa_get_membind_func_t _numa_get_membind;
> >>     static unsigned long* _numa_all_nodes;
> >>     static struct bitmask* _numa_all_nodes_ptr;
> >>     static struct bitmask* _numa_nodes_ptr; @@ -259,6 +263,8 @@
> >>     static void set_numa_set_bind_policy(numa_set_bind_policy_func_t
> func) {
> >> _numa_set_bind_policy = func; }
> >>     static void
> >> set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t
> func)
> >> { _numa_bitmask_isbitset = func; }
> >>     static void set_numa_distance(numa_distance_func_t func) {
> >> _numa_distance = func; }
> >> +  static void set_numa_set_membind(numa_set_membind_func_t func)
> {
> >> _numa_set_membind = func; }
> >> +  static void set_numa_get_membind(numa_get_membind_func_t func)
> {
> >> _numa_get_membind = func; }
> >>     static void set_numa_all_nodes(unsigned long* ptr) {
> >> _numa_all_nodes
> =
> >> ptr; }
> >>     static void set_numa_all_nodes_ptr(struct bitmask **ptr) {
> >> _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
> >>     static void set_numa_nodes_ptr(struct bitmask **ptr) {
> _numa_nodes_ptr =
> >> (ptr == NULL ? NULL : *ptr); }
> >> @@ -320,6 +326,34 @@
> >>       } else
> >>         return 0;
> >>     }
> >> +  // Check if node in bounded nodes
> >> +  static bool isnode_in_bounded_nodes(int node) {
> >> +    struct bitmask* bmp = _numa_get_membind != NULL ?
> _numa_get_membind()
> >> : NULL;
> >> +    if (bmp != NULL && _numa_bitmask_isbitset != NULL &&
> >> _numa_bitmask_isbitset(bmp, node)) {
> >> +      return true;
> >> +    } else
> >> +      return false;
> >> +  }
> >> +  // Check if a single node is bound  static bool
> >> + issingle_node_bound() {
> >> +    struct bitmask* bmp = _numa_get_membind != NULL ?
> _numa_get_membind()
> >> : NULL;
> >> +    if(bmp == NULL) return false;
> >> +    int issingle = 0;
> >> +    // System can have more than 64 nodes so check in all the
> >> + elements
> of
> >> +    // unsigned long array
> >> +    for (unsigned long i = 0; i < (bmp->size / (8 * sizeof(unsigned
> >> long))); i++) {
> >> +       if (bmp->maskp != NULL && (((bmp->maskp[i]) &
> >> + (((bmp->maskp[i]))
> -
> >> 1)) == 0)) {
> >> +         issingle++;
> >> +       } else if (bmp->maskp[i] == 0) {
> >> +         continue;
> >> +       } else {
> >> +         return false;
> >> +       }
> >> +    }
> >> +    if (issingle == 1)
> >> +      return true;
> >> +    return false;
> >> +  }
> >>   };
> >>
> >>   #endif // OS_LINUX_VM_OS_LINUX_HPP
> >> diff --git a/src/hotspot/share/runtime/os.hpp
> >> b/src/hotspot/share/runtime/os.hpp
> >> --- a/src/hotspot/share/runtime/os.hpp
> >> +++ b/src/hotspot/share/runtime/os.hpp
> >> @@ -81,6 +81,10 @@
> >>     CriticalPriority = 11      // Critical thread priority
> >>   };
> >>
> >> +extern "C" struct bitmask {
> >> +  unsigned long size; /* number of bits in the map */
> >> +  unsigned long *maskp;
> >> +};
> >>   // Executable parameter flag for os::commit_memory() and
> >>   // os::commit_memory_or_exit().
> >>   const bool ExecMem = true;
> >>
> >>
> ===============================================================
> ==============
> >>
> >> Thanks,
> >> Swati
> >>


More information about the hotspot-dev mailing list