UseNUMA membind Issue in openJDK
David Holmes
david.holmes at oracle.com
Thu Apr 26 12:40:02 UTC 2018
Hi Swati,
On 26/04/2018 10:20 PM, Swati Sharma wrote:
> Hi Everyone,
>
> I work at AMD and this is my first patch as a new member of openJDK
> community.
Welcome!
I can't comment on the actual NUMA details of the patch (though I can
see what you're doing), but the struct bitmask declaration in os.hpp
should be localized in os_linux.hpp as far as I can see, as it's only
needed internally in the Linux code.
Thanks,
David
-----
> I have found some issue while running specjbb2015 composite workload with
> the flag -XX:+UseNUMA. It seems that JVM does not allocate memory according
> to the explicit node binding done using "numactl --membind".
>
> E.g. If bound to a single memory node, JVM divides the whole heap based on
> the total number of numa nodes available on the system which creates more
> logical groups(lgrps) than required which cannot be used except the one.
>
> The following examples will explain clearly :
> (Note : Collected GC logs with
> -Xlog:gc*=debug:file=gc.log:time,uptimemillis)
> 1) Allocating a heap of 22GB for single node divides the whole heap in 8
> lgrp(Actual no of Nodes are 8)
> $numactl --cpunodebind=0 --membind=0 java -Xmx24g -Xms24g -Xmn22g
> -XX:+UseNUMA <composite_application>
>
> eden space 22511616K(22GB), 12% used
> lgrp 0 space 2813952K, 100% used lgrp 1 space
> 2813952K, 0% used lgrp 2 space 2813952K, 0% used
> lgrp 3 space 2813952K, 0% used lgrp 4 space
> 2813952K, 0% used lgrp 5 space 2813952K, 0% used
> lgrp 6 space 2813952K, 0% used lgrp 7 space
> 2813952K, 0% used
>
> Observation : Instead of disabling UseNUMA for single node binding JVM
> divides the memory in 8 lgrps and allocates memory always on the bounded
> node hence in eden space allocation never happens more than 12%.
>
> 2) Another case of binding to node 0 and 7 results in dividing the heap in
> 8lgrp
> $numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g -Xmn45g
> -XX:+UseNUMA <composite_application>
>
> eden space 46718976K, 6% used
> lgrp 0 space 5838848K, 14% used lgrp 1 space 5838848K,
> 0% used lgrp 2 space 5838848K, 0% used
> lgrp 3 space 5838848K, 0% used lgrp 4 space
> 5838848K, 0% used lgrp 5 space 5838848K, 0%
> used
> lgrp 6 space 5838848K, 0% used lgrp 7 space
> 5847040K, 35% used
>
> Observation : Similar to first case allocation happens only on 0th and 7th
> node and rest of the lgrps never gets used.
>
> After applying the patch, JVM divides the given heap size according to the
> bounded memory nodes only.
>
> 1) Binding to single node disables UseNUMA
> eden space 46718976K(45GB), 99% used
>
> Observation : UseNUMA gets disabled hence no lgrp creation and the whole
> heap allocation happens on the bounded node.
>
> 2) Binding to node 0 and 7
> $ numactl --cpunodebind=0,7 –membind=0,7 java -Xms50g -Xmx50g -Xmn45g
> -XX:+UseNUMA <composite_application>
> eden space 46718976K(45GB), 99% used
> lgrp 0 space 23359488K(23.5GB), 100% used lgrp 7 space
> 23359488K(23.5GB), 99% used
>
> Observation : Only two lgrps gets created and heap size gets divided
> equally in both nodes.
>
> If there is no binding, then JVM will divide the whole heap based on the
> number of NUMA nodes available on the system.
>
> The following patch fixes the issue(attached also).
> Please review and let me know your comments.
>
> Regression testing using jtreg (make -J=1 run-test-tier1 run-test-tier2)
> didn't show any new failures.
>
> ===============================PATCH========================================
> diff --git a/src/hotspot/os/linux/os_linux.cpp
> b/src/hotspot/os/linux/os_linux.cpp
> --- a/src/hotspot/os/linux/os_linux.cpp
> +++ b/src/hotspot/os/linux/os_linux.cpp
> @@ -2832,8 +2832,10 @@
> // Map all node ids in which is possible to allocate memory. Also nodes
> are
> // not always consecutively available, i.e. available from 0 to the
> highest
> // node number.
> + // If the nodes have been bound explicitly using numactl membind, then
> + // allocate memory from those nodes only.
> for (size_t node = 0; node <= highest_node_number; node++) {
> - if (Linux::isnode_in_configured_nodes(node)) {
> + if (Linux::isnode_in_bounded_nodes(node)) {
> ids[i++] = node;
> }
> }
> @@ -2930,6 +2932,10 @@
> libnuma_dlsym(handle,
> "numa_bitmask_isbitset")));
> set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t,
> libnuma_dlsym(handle,
> "numa_distance")));
> + set_numa_set_membind(CAST_TO_FN_PTR(numa_set_membind_func_t,
> + libnuma_dlsym(handle,
> "numa_set_membind")));
> + set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t,
> + libnuma_v2_dlsym(handle,
> "numa_get_membind")));
>
> if (numa_available() != -1) {
> set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle,
> "numa_all_nodes"));
> @@ -3054,6 +3060,8 @@
> os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy;
> os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset;
> os::Linux::numa_distance_func_t os::Linux::_numa_distance;
> +os::Linux::numa_set_membind_func_t os::Linux::_numa_set_membind;
> +os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind;
> unsigned long* os::Linux::_numa_all_nodes;
> struct bitmask* os::Linux::_numa_all_nodes_ptr;
> struct bitmask* os::Linux::_numa_nodes_ptr;
> @@ -4962,8 +4970,9 @@
> if (!Linux::libnuma_init()) {
> UseNUMA = false;
> } else {
> - if ((Linux::numa_max_node() < 1)) {
> - // There's only one node(they start from 0), disable NUMA.
> + if ((Linux::numa_max_node() < 1) || Linux::issingle_node_bound()) {
> + // If there's only one node(they start from 0) or if the process
> + // is bound explicitly to a single node using membind, disable
> NUMA.
> UseNUMA = false;
> }
> }
> diff --git a/src/hotspot/os/linux/os_linux.hpp
> b/src/hotspot/os/linux/os_linux.hpp
> --- a/src/hotspot/os/linux/os_linux.hpp
> +++ b/src/hotspot/os/linux/os_linux.hpp
> @@ -228,6 +228,8 @@
> typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int
> node);
> typedef void (*numa_interleave_memory_func_t)(void *start, size_t size,
> unsigned long *nodemask);
> typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t
> size, struct bitmask* mask);
> + typedef void (*numa_set_membind_func_t)(struct bitmask *mask);
> + typedef struct bitmask* (*numa_get_membind_func_t)(void);
>
> typedef void (*numa_set_bind_policy_func_t)(int policy);
> typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp,
> unsigned int n);
> @@ -244,6 +246,8 @@
> static numa_set_bind_policy_func_t _numa_set_bind_policy;
> static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
> static numa_distance_func_t _numa_distance;
> + static numa_set_membind_func_t _numa_set_membind;
> + static numa_get_membind_func_t _numa_get_membind;
> static unsigned long* _numa_all_nodes;
> static struct bitmask* _numa_all_nodes_ptr;
> static struct bitmask* _numa_nodes_ptr;
> @@ -259,6 +263,8 @@
> static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) {
> _numa_set_bind_policy = func; }
> static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func)
> { _numa_bitmask_isbitset = func; }
> static void set_numa_distance(numa_distance_func_t func) {
> _numa_distance = func; }
> + static void set_numa_set_membind(numa_set_membind_func_t func) {
> _numa_set_membind = func; }
> + static void set_numa_get_membind(numa_get_membind_func_t func) {
> _numa_get_membind = func; }
> static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes =
> ptr; }
> static void set_numa_all_nodes_ptr(struct bitmask **ptr) {
> _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
> static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr =
> (ptr == NULL ? NULL : *ptr); }
> @@ -320,6 +326,34 @@
> } else
> return 0;
> }
> + // Check if node in bounded nodes
> + static bool isnode_in_bounded_nodes(int node) {
> + struct bitmask* bmp = _numa_get_membind != NULL ? _numa_get_membind()
> : NULL;
> + if (bmp != NULL && _numa_bitmask_isbitset != NULL &&
> _numa_bitmask_isbitset(bmp, node)) {
> + return true;
> + } else
> + return false;
> + }
> + // Check if a single node is bound
> + static bool issingle_node_bound() {
> + struct bitmask* bmp = _numa_get_membind != NULL ? _numa_get_membind()
> : NULL;
> + if(bmp == NULL) return false;
> + int issingle = 0;
> + // System can have more than 64 nodes so check in all the elements of
> + // unsigned long array
> + for (unsigned long i = 0; i < (bmp->size / (8 * sizeof(unsigned
> long))); i++) {
> + if (bmp->maskp != NULL && (((bmp->maskp[i]) & (((bmp->maskp[i])) -
> 1)) == 0)) {
> + issingle++;
> + } else if (bmp->maskp[i] == 0) {
> + continue;
> + } else {
> + return false;
> + }
> + }
> + if (issingle == 1)
> + return true;
> + return false;
> + }
> };
>
> #endif // OS_LINUX_VM_OS_LINUX_HPP
> diff --git a/src/hotspot/share/runtime/os.hpp
> b/src/hotspot/share/runtime/os.hpp
> --- a/src/hotspot/share/runtime/os.hpp
> +++ b/src/hotspot/share/runtime/os.hpp
> @@ -81,6 +81,10 @@
> CriticalPriority = 11 // Critical thread priority
> };
>
> +extern "C" struct bitmask {
> + unsigned long size; /* number of bits in the map */
> + unsigned long *maskp;
> +};
> // Executable parameter flag for os::commit_memory() and
> // os::commit_memory_or_exit().
> const bool ExecMem = true;
>
> =============================================================================
>
> Thanks,
> Swati
>
More information about the hotspot-dev
mailing list