UseNUMA membind Issue in openJDK

Swati Sharma swatibits14 at gmail.com
Wed May 2 10:24:17 UTC 2018


Hi David,

I have localized the struct bitmask declaration in os_linux.cpp.

Here is the updated patch
===================================PATCH===================================================
diff --git a/src/hotspot/os/linux/os_linux.cpp
b/src/hotspot/os/linux/os_linux.cpp
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -2832,14 +2832,42 @@
   // Map all node ids in which is possible to allocate memory. Also nodes
are
   // not always consecutively available, i.e. available from 0 to the
highest
   // node number.
+  // If the nodes have been bound explicitly using numactl membind, then
+  // allocate memory from those nodes only.
   for (size_t node = 0; node <= highest_node_number; node++) {
-    if (Linux::isnode_in_configured_nodes(node)) {
+    if (Linux::isnode_in_bounded_nodes(node)) {
       ids[i++] = node;
     }
   }
   return i;
 }

+extern "C"  struct bitmask {
+  unsigned long size; /* number of bits in the map */
+  unsigned long *maskp;
+};
+// Check if single memory node bound.
+// Returns true if single memory node bound.
+bool os::Linux::issingle_node_bound() {
+  struct bitmask* bmp = _numa_get_membind != NULL ? _numa_get_membind() :
NULL;
+  if(bmp == NULL) return false;
+  int issingle = 0;
+  // System can have more than 64 nodes so check in all the elements of
+  // unsigned long array
+  for (unsigned long i = 0; i < (bmp->size / (8 * sizeof(unsigned long)));
i++) {
+    if (bmp->maskp != NULL && (((bmp->maskp[i]) & (((bmp->maskp[i])) - 1))
== 0)) {
+      issingle++;
+    } else if (bmp->maskp[i] == 0) {
+      continue;
+    } else {
+      return false;
+    }
+  }
+  if (issingle == 1)
+    return true;
+  return false;
+}
+
 bool os::get_page_info(char *start, page_info* info) {
   return false;
 }
@@ -2930,6 +2958,10 @@
                                                libnuma_dlsym(handle,
"numa_bitmask_isbitset")));
       set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
                                        libnuma_dlsym(handle,
"numa_distance")));
+      set_numa_set_membind(CAST_TO_FN_PTR(numa_set_membind_func_t,
+                                          libnuma_dlsym(handle,
"numa_set_membind")));
+      set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
+                                          libnuma_v2_dlsym(handle,
"numa_get_membind")));

       if (numa_available() != -1) {
         set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle,
"numa_all_nodes"));
@@ -3054,6 +3086,8 @@
 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
 os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
 os::Linux::numa_distance_func_t os::Linux::_numa_distance;
+os::Linux::numa_set_membind_func_t os::Linux::_numa_set_membind;
+os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
 unsigned long* os::Linux::_numa_all_nodes;
 struct bitmask* os::Linux::_numa_all_nodes_ptr;
 struct bitmask* os::Linux::_numa_nodes_ptr;
@@ -4962,8 +4996,9 @@
     if (!Linux::libnuma_init()) {
       UseNUMA = false;
     } else {
-      if ((Linux::numa_max_node() < 1)) {
-        // There's only one node(they start from 0), disable NUMA.
+      if ((Linux::numa_max_node() < 1) || Linux::issingle_node_bound()) {
+        // If there's only one node(they start from 0) or if the process
+        // is bound explicitly to a single node using membind, disable
NUMA.
         UseNUMA = false;
       }
     }
diff --git a/src/hotspot/os/linux/os_linux.hpp
b/src/hotspot/os/linux/os_linux.hpp
--- a/src/hotspot/os/linux/os_linux.hpp
+++ b/src/hotspot/os/linux/os_linux.hpp
@@ -228,6 +228,8 @@
   typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int
node);
   typedef void (*numa_interleave_memory_func_t)(void *start, size_t size,
unsigned long *nodemask);
   typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t
size, struct bitmask* mask);
+  typedef void (*numa_set_membind_func_t)(struct bitmask *mask);
+  typedef struct bitmask* (*numa_get_membind_func_t)(void);

   typedef void (*numa_set_bind_policy_func_t)(int policy);
   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp,
unsigned int n);
@@ -244,6 +246,8 @@
   static numa_set_bind_policy_func_t _numa_set_bind_policy;
   static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
   static numa_distance_func_t _numa_distance;
+  static numa_set_membind_func_t _numa_set_membind;
+  static numa_get_membind_func_t _numa_get_membind;
   static unsigned long* _numa_all_nodes;
   static struct bitmask* _numa_all_nodes_ptr;
   static struct bitmask* _numa_nodes_ptr;
@@ -259,6 +263,8 @@
   static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) {
_numa_set_bind_policy = func; }
   static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func)
{ _numa_bitmask_isbitset = func; }
   static void set_numa_distance(numa_distance_func_t func) {
_numa_distance = func; }
+  static void set_numa_set_membind(numa_set_membind_func_t func) {
_numa_set_membind = func; }
+  static void set_numa_get_membind(numa_get_membind_func_t func) {
_numa_get_membind = func; }
   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes =
ptr; }
   static void set_numa_all_nodes_ptr(struct bitmask **ptr) {
_numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
   static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr =
(ptr == NULL ? NULL : *ptr); }
@@ -320,6 +326,15 @@
     } else
       return 0;
   }
+  // Check if node in bounded nodes
+  static bool isnode_in_bounded_nodes(int node) {
+    struct bitmask* bmp = _numa_get_membind != NULL ? _numa_get_membind()
: NULL;
+    if (bmp != NULL && _numa_bitmask_isbitset != NULL &&
_numa_bitmask_isbitset(bmp, node)) {
+      return true;
+    } else
+      return false;
+  }
+  static bool issingle_node_bound();
 };

 #endif // OS_LINUX_VM_OS_LINUX_HPP

============================================================================================

Thanks,
Swati

On Thu, Apr 26, 2018 at 6:10 PM, David Holmes <david.holmes at oracle.com>
wrote:
>
> Hi Swati,
>
> On 26/04/2018 10:20 PM, Swati Sharma wrote:
>>
>> Hi Everyone,
>>
>> I work at AMD and this is my first patch as a new member of openJDK
>> community.
>
>
> Welcome!
>
> I can't comment on the actual NUMA details of the patch (though I can see
what you're doing), but the struct bitmask declaration in os.hpp should be
localized in os_linux.hpp as far as I can see, as it's only needed
internally in the Linux code.
>
> Thanks,
> David
> -----
>
>
>> I have found some issue while running  specjbb2015 composite workload
with
>> the flag -XX:+UseNUMA. It seems that JVM does not allocate memory
according
>> to the explicit node binding done using "numactl --membind".
>>
>> E.g. If bound to a single memory node,  JVM divides the whole heap based
on
>> the total number of numa nodes available on the system which creates more
>> logical groups(lgrps) than required which cannot be used except the one.
>>
>> The following examples will explain clearly :
>> (Note : Collected GC logs with
>> -Xlog:gc*=debug:file=gc.log:time,uptimemillis)
>> 1) Allocating a heap of 22GB for single node divides the whole heap in 8
>> lgrp(Actual no of Nodes are 8)
>>      $numactl --cpunodebind=0 --membind=0 java -Xmx24g -Xms24g -Xmn22g
>> -XX:+UseNUMA <composite_application>
>>
>>      eden space 22511616K(22GB), 12% used
>>      lgrp 0 space 2813952K, 100% used                       lgrp 1 space
>> 2813952K, 0% used                          lgrp 2 space 2813952K, 0% used
>>      lgrp 3 space 2813952K, 0% used                           lgrp 4
space
>> 2813952K, 0% used                          lgrp 5 space 2813952K, 0% used
>>      lgrp 6 space 2813952K, 0% used                           lgrp 7
space
>> 2813952K, 0% used
>>
>> Observation : Instead of disabling UseNUMA for single node binding JVM
>> divides the memory in 8 lgrps and allocates memory always on the bounded
>> node hence in eden space allocation never happens more than 12%.
>>
>> 2) Another case of binding to node 0 and 7 results in dividing the heap
in
>> 8lgrp
>>      $numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g -Xmn45g
>>   -XX:+UseNUMA <composite_application>
>>
>>      eden space 46718976K, 6% used
>>      lgrp 0 space 5838848K, 14% used                  lgrp 1 space
5838848K,
>> 0% used                              lgrp 2 space 5838848K, 0% used
>>      lgrp 3 space 5838848K, 0% used                    lgrp 4 space
>> 5838848K, 0% used                              lgrp 5 space 5838848K, 0%
>> used
>>       lgrp 6 space 5838848K, 0% used                    lgrp 7 space
>> 5847040K, 35% used
>>
>> Observation : Similar to first case allocation happens only on 0th and
7th
>> node and rest of the lgrps never gets used.
>>
>> After applying the patch, JVM divides the given heap size according to
the
>> bounded memory nodes only.
>>
>> 1) Binding to single node disables UseNUMA
>>      eden space 46718976K(45GB), 99% used
>>
>> Observation : UseNUMA gets disabled hence no lgrp creation and the whole
>> heap allocation happens on the bounded node.
>>
>> 2) Binding to node 0 and 7
>>       $ numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g
-Xmn45g
>>   -XX:+UseNUMA <composite_application>
>>       eden space 46718976K(45GB), 99% used
>>       lgrp 0 space 23359488K(23.5GB), 100% used            lgrp 7 space
>> 23359488K(23.5GB), 99% used
>>
>> Observation : Only two lgrps gets created and heap size gets divided
>> equally in both nodes.
>>
>> If there is no binding, then JVM will divide the whole heap based on the
>> number of NUMA nodes available on the system.
>>
>> The following patch fixes the issue(attached also).
>> Please review and let me know your comments.
>>
>> Regression testing using jtreg (make -J=1 run-test-tier1 run-test-tier2)
>> didn't show any new failures.
>>
>>
===============================PATCH========================================
>> diff --git a/src/hotspot/os/linux/os_linux.cpp
>> b/src/hotspot/os/linux/os_linux.cpp
>> --- a/src/hotspot/os/linux/os_linux.cpp
>> +++ b/src/hotspot/os/linux/os_linux.cpp
>> @@ -2832,8 +2832,10 @@
>>     // Map all node ids in which is possible to allocate memory. Also
nodes
>> are
>>     // not always consecutively available, i.e. available from 0 to the
>> highest
>>     // node number.
>> +  // If the nodes have been bound explicitly using numactl membind, then
>> +  // allocate memory from those nodes only.
>>     for (size_t node = 0; node <= highest_node_number; node++) {
>> -    if (Linux::isnode_in_configured_nodes(node)) {
>> +    if (Linux::isnode_in_bounded_nodes(node)) {
>>         ids[i++] = node;
>>       }
>>     }
>> @@ -2930,6 +2932,10 @@
>>                                                  libnuma_dlsym(handle,
>> "numa_bitmask_isbitset")));
>>         set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
>>                                          libnuma_dlsym(handle,
>> "numa_distance")));
>> +      set_numa_set_membind(CAST_TO_FN_PTR(numa_set_membind_func_t,
>> +                                          libnuma_dlsym(handle,
>> "numa_set_membind")));
>> +      set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
>> +                                          libnuma_v2_dlsym(handle,
>> "numa_get_membind")));
>>
>>         if (numa_available() != -1) {
>>           set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle,
>> "numa_all_nodes"));
>> @@ -3054,6 +3060,8 @@
>>   os::Linux::numa_set_bind_policy_func_t
os::Linux::_numa_set_bind_policy;
>>   os::Linux::numa_bitmask_isbitset_func_t
os::Linux::_numa_bitmask_isbitset;
>>   os::Linux::numa_distance_func_t os::Linux::_numa_distance;
>> +os::Linux::numa_set_membind_func_t os::Linux::_numa_set_membind;
>> +os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
>>   unsigned long* os::Linux::_numa_all_nodes;
>>   struct bitmask* os::Linux::_numa_all_nodes_ptr;
>>   struct bitmask* os::Linux::_numa_nodes_ptr;
>> @@ -4962,8 +4970,9 @@
>>       if (!Linux::libnuma_init()) {
>>         UseNUMA = false;
>>       } else {
>> -      if ((Linux::numa_max_node() < 1)) {
>> -        // There's only one node(they start from 0), disable NUMA.
>> +      if ((Linux::numa_max_node() < 1) || Linux::issingle_node_bound())
{
>> +        // If there's only one node(they start from 0) or if the process
>> +        // is bound explicitly to a single node using membind, disable
>> NUMA.
>>           UseNUMA = false;
>>         }
>>       }
>> diff --git a/src/hotspot/os/linux/os_linux.hpp
>> b/src/hotspot/os/linux/os_linux.hpp
>> --- a/src/hotspot/os/linux/os_linux.hpp
>> +++ b/src/hotspot/os/linux/os_linux.hpp
>> @@ -228,6 +228,8 @@
>>     typedef int (*numa_tonode_memory_func_t)(void *start, size_t size,
int
>> node);
>>     typedef void (*numa_interleave_memory_func_t)(void *start, size_t
size,
>> unsigned long *nodemask);
>>     typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t
>> size, struct bitmask* mask);
>> +  typedef void (*numa_set_membind_func_t)(struct bitmask *mask);
>> +  typedef struct bitmask* (*numa_get_membind_func_t)(void);
>>
>>     typedef void (*numa_set_bind_policy_func_t)(int policy);
>>     typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp,
>> unsigned int n);
>> @@ -244,6 +246,8 @@
>>     static numa_set_bind_policy_func_t _numa_set_bind_policy;
>>     static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
>>     static numa_distance_func_t _numa_distance;
>> +  static numa_set_membind_func_t _numa_set_membind;
>> +  static numa_get_membind_func_t _numa_get_membind;
>>     static unsigned long* _numa_all_nodes;
>>     static struct bitmask* _numa_all_nodes_ptr;
>>     static struct bitmask* _numa_nodes_ptr;
>> @@ -259,6 +263,8 @@
>>     static void set_numa_set_bind_policy(numa_set_bind_policy_func_t
func) {
>> _numa_set_bind_policy = func; }
>>     static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t
func)
>> { _numa_bitmask_isbitset = func; }
>>     static void set_numa_distance(numa_distance_func_t func) {
>> _numa_distance = func; }
>> +  static void set_numa_set_membind(numa_set_membind_func_t func) {
>> _numa_set_membind = func; }
>> +  static void set_numa_get_membind(numa_get_membind_func_t func) {
>> _numa_get_membind = func; }
>>     static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes
=
>> ptr; }
>>     static void set_numa_all_nodes_ptr(struct bitmask **ptr) {
>> _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
>>     static void set_numa_nodes_ptr(struct bitmask **ptr) {
_numa_nodes_ptr =
>> (ptr == NULL ? NULL : *ptr); }
>> @@ -320,6 +326,34 @@
>>       } else
>>         return 0;
>>     }
>> +  // Check if node in bounded nodes
>> +  static bool isnode_in_bounded_nodes(int node) {
>> +    struct bitmask* bmp = _numa_get_membind != NULL ?
_numa_get_membind()
>> : NULL;
>> +    if (bmp != NULL && _numa_bitmask_isbitset != NULL &&
>> _numa_bitmask_isbitset(bmp, node)) {
>> +      return true;
>> +    } else
>> +      return false;
>> +  }
>> +  // Check if a single node is bound
>> +  static bool issingle_node_bound() {
>> +    struct bitmask* bmp = _numa_get_membind != NULL ?
_numa_get_membind()
>> : NULL;
>> +    if(bmp == NULL) return false;
>> +    int issingle = 0;
>> +    // System can have more than 64 nodes so check in all the elements
of
>> +    // unsigned long array
>> +    for (unsigned long i = 0; i < (bmp->size / (8 * sizeof(unsigned
>> long))); i++) {
>> +       if (bmp->maskp != NULL && (((bmp->maskp[i]) & (((bmp->maskp[i]))
-
>> 1)) == 0)) {
>> +         issingle++;
>> +       } else if (bmp->maskp[i] == 0) {
>> +         continue;
>> +       } else {
>> +         return false;
>> +       }
>> +    }
>> +    if (issingle == 1)
>> +      return true;
>> +    return false;
>> +  }
>>   };
>>
>>   #endif // OS_LINUX_VM_OS_LINUX_HPP
>> diff --git a/src/hotspot/share/runtime/os.hpp
>> b/src/hotspot/share/runtime/os.hpp
>> --- a/src/hotspot/share/runtime/os.hpp
>> +++ b/src/hotspot/share/runtime/os.hpp
>> @@ -81,6 +81,10 @@
>>     CriticalPriority = 11      // Critical thread priority
>>   };
>>
>> +extern "C" struct bitmask {
>> +  unsigned long size; /* number of bits in the map */
>> +  unsigned long *maskp;
>> +};
>>   // Executable parameter flag for os::commit_memory() and
>>   // os::commit_memory_or_exit().
>>   const bool ExecMem = true;
>>
>>
=============================================================================
>>
>> Thanks,
>> Swati
>>


More information about the hotspot-dev mailing list