UseNUMA membind Issue in openJDK

Swati Sharma swatibits14 at gmail.com
Thu Apr 26 12:20:21 UTC 2018


Hi Everyone,

I work at AMD and this is my first patch as a new member of openJDK
community.

I have found some issue while running  specjbb2015 composite workload with
the flag -XX:+UseNUMA. It seems that JVM does not allocate memory according
to the explicit node binding done using "numactl --membind".

E.g. If bound to a single memory node,  JVM divides the whole heap based on
the total number of numa nodes available on the system which creates more
logical groups(lgrps) than required which cannot be used except the one.

The following examples will explain clearly :
(Note : Collected GC logs with
-Xlog:gc*=debug:file=gc.log:time,uptimemillis)
1) Allocating a heap of 22GB for single node divides the whole heap in 8
lgrp(Actual no of Nodes are 8)
    $numactl --cpunodebind=0 --membind=0 java -Xmx24g -Xms24g -Xmn22g
-XX:+UseNUMA <composite_application>

    eden space 22511616K(22GB), 12% used
    lgrp 0 space 2813952K, 100% used                       lgrp 1 space
2813952K, 0% used                          lgrp 2 space 2813952K, 0% used
    lgrp 3 space 2813952K, 0% used                           lgrp 4 space
2813952K, 0% used                          lgrp 5 space 2813952K, 0% used
    lgrp 6 space 2813952K, 0% used                           lgrp 7 space
2813952K, 0% used

Observation : Instead of disabling UseNUMA for single node binding JVM
divides the memory in 8 lgrps and allocates memory always on the bounded
node hence in eden space allocation never happens more than 12%.

2) Another case of binding to node 0 and 7 results in dividing the heap in
8lgrp
    $numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g -Xmn45g
 -XX:+UseNUMA <composite_application>

    eden space 46718976K, 6% used
    lgrp 0 space 5838848K, 14% used                  lgrp 1 space 5838848K,
0% used                              lgrp 2 space 5838848K, 0% used
    lgrp 3 space 5838848K, 0% used                    lgrp 4 space
5838848K, 0% used                              lgrp 5 space 5838848K, 0%
used
     lgrp 6 space 5838848K, 0% used                    lgrp 7 space
5847040K, 35% used

Observation : Similar to first case allocation happens only on 0th and 7th
node and rest of the lgrps never gets used.

After applying the patch, JVM divides the given heap size according to the
bounded memory nodes only.

1) Binding to single node disables UseNUMA
    eden space 46718976K(45GB), 99% used

Observation : UseNUMA gets disabled hence no lgrp creation and the whole
heap allocation happens on the bounded node.

2) Binding to node 0 and 7
     $ numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g -Xmn45g
 -XX:+UseNUMA <composite_application>
     eden space 46718976K(45GB), 99% used
     lgrp 0 space 23359488K(23.5GB), 100% used            lgrp 7 space
23359488K(23.5GB), 99% used

Observation : Only two lgrps gets created and heap size gets divided
equally in both nodes.

If there is no binding, then JVM will divide the whole heap based on the
number of NUMA nodes available on the system.

The following patch fixes the issue(attached also).
Please review and let me know your comments.

Regression testing using jtreg (make -J=1 run-test-tier1 run-test-tier2)
didn't show any new failures.

===============================PATCH========================================
diff --git a/src/hotspot/os/linux/os_linux.cpp
b/src/hotspot/os/linux/os_linux.cpp
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -2832,8 +2832,10 @@
   // Map all node ids in which is possible to allocate memory. Also nodes
are
   // not always consecutively available, i.e. available from 0 to the
highest
   // node number.
+  // If the nodes have been bound explicitly using numactl membind, then
+  // allocate memory from those nodes only.
   for (size_t node = 0; node <= highest_node_number; node++) {
-    if (Linux::isnode_in_configured_nodes(node)) {
+    if (Linux::isnode_in_bounded_nodes(node)) {
       ids[i++] = node;
     }
   }
@@ -2930,6 +2932,10 @@
                                                libnuma_dlsym(handle,
"numa_bitmask_isbitset")));
       set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
                                        libnuma_dlsym(handle,
"numa_distance")));
+      set_numa_set_membind(CAST_TO_FN_PTR(numa_set_membind_func_t,
+                                          libnuma_dlsym(handle,
"numa_set_membind")));
+      set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
+                                          libnuma_v2_dlsym(handle,
"numa_get_membind")));

       if (numa_available() != -1) {
         set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle,
"numa_all_nodes"));
@@ -3054,6 +3060,8 @@
 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
 os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
 os::Linux::numa_distance_func_t os::Linux::_numa_distance;
+os::Linux::numa_set_membind_func_t os::Linux::_numa_set_membind;
+os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
 unsigned long* os::Linux::_numa_all_nodes;
 struct bitmask* os::Linux::_numa_all_nodes_ptr;
 struct bitmask* os::Linux::_numa_nodes_ptr;
@@ -4962,8 +4970,9 @@
     if (!Linux::libnuma_init()) {
       UseNUMA = false;
     } else {
-      if ((Linux::numa_max_node() < 1)) {
-        // There's only one node(they start from 0), disable NUMA.
+      if ((Linux::numa_max_node() < 1) || Linux::issingle_node_bound()) {
+        // If there's only one node(they start from 0) or if the process
+        // is bound explicitly to a single node using membind, disable
NUMA.
         UseNUMA = false;
       }
     }
diff --git a/src/hotspot/os/linux/os_linux.hpp
b/src/hotspot/os/linux/os_linux.hpp
--- a/src/hotspot/os/linux/os_linux.hpp
+++ b/src/hotspot/os/linux/os_linux.hpp
@@ -228,6 +228,8 @@
   typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int
node);
   typedef void (*numa_interleave_memory_func_t)(void *start, size_t size,
unsigned long *nodemask);
   typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t
size, struct bitmask* mask);
+  typedef void (*numa_set_membind_func_t)(struct bitmask *mask);
+  typedef struct bitmask* (*numa_get_membind_func_t)(void);

   typedef void (*numa_set_bind_policy_func_t)(int policy);
   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp,
unsigned int n);
@@ -244,6 +246,8 @@
   static numa_set_bind_policy_func_t _numa_set_bind_policy;
   static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
   static numa_distance_func_t _numa_distance;
+  static numa_set_membind_func_t _numa_set_membind;
+  static numa_get_membind_func_t _numa_get_membind;
   static unsigned long* _numa_all_nodes;
   static struct bitmask* _numa_all_nodes_ptr;
   static struct bitmask* _numa_nodes_ptr;
@@ -259,6 +263,8 @@
   static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) {
_numa_set_bind_policy = func; }
   static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func)
{ _numa_bitmask_isbitset = func; }
   static void set_numa_distance(numa_distance_func_t func) {
_numa_distance = func; }
+  static void set_numa_set_membind(numa_set_membind_func_t func) {
_numa_set_membind = func; }
+  static void set_numa_get_membind(numa_get_membind_func_t func) {
_numa_get_membind = func; }
   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes =
ptr; }
   static void set_numa_all_nodes_ptr(struct bitmask **ptr) {
_numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
   static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr =
(ptr == NULL ? NULL : *ptr); }
@@ -320,6 +326,34 @@
     } else
       return 0;
   }
+  // Check if node in bounded nodes
+  static bool isnode_in_bounded_nodes(int node) {
+    struct bitmask* bmp = _numa_get_membind != NULL ? _numa_get_membind()
: NULL;
+    if (bmp != NULL && _numa_bitmask_isbitset != NULL &&
_numa_bitmask_isbitset(bmp, node)) {
+      return true;
+    } else
+      return false;
+  }
+  // Check if a single node is bound
+  static bool issingle_node_bound() {
+    struct bitmask* bmp = _numa_get_membind != NULL ? _numa_get_membind()
: NULL;
+    if(bmp == NULL) return false;
+    int issingle = 0;
+    // System can have more than 64 nodes so check in all the elements of
+    // unsigned long array
+    for (unsigned long i = 0; i < (bmp->size / (8 * sizeof(unsigned
long))); i++) {
+       if (bmp->maskp != NULL && (((bmp->maskp[i]) & (((bmp->maskp[i])) -
1)) == 0)) {
+         issingle++;
+       } else if (bmp->maskp[i] == 0) {
+         continue;
+       } else {
+         return false;
+       }
+    }
+    if (issingle == 1)
+      return true;
+    return false;
+  }
 };

 #endif // OS_LINUX_VM_OS_LINUX_HPP
diff --git a/src/hotspot/share/runtime/os.hpp
b/src/hotspot/share/runtime/os.hpp
--- a/src/hotspot/share/runtime/os.hpp
+++ b/src/hotspot/share/runtime/os.hpp
@@ -81,6 +81,10 @@
   CriticalPriority = 11      // Critical thread priority
 };

+extern "C" struct bitmask {
+  unsigned long size; /* number of bits in the map */
+  unsigned long *maskp;
+};
 // Executable parameter flag for os::commit_memory() and
 // os::commit_memory_or_exit().
 const bool ExecMem = true;

=============================================================================

Thanks,
Swati


More information about the hotspot-dev mailing list