RFR: 8327114: Attach in Linux may have wrong behaviour when pid == ns_pid (Kubernetes debug container) [v2]

Laurence Cable larry.cable at oracle.com
Wed May 8 22:16:11 UTC 2024


I did some thinking on this issue over the weekend and came up with an 
idea that *may* improve the probability of an attach succeeding in the
case that the target has elevated privileges and the jcmd is not in the 
same mnt namespace as the target JVM.

basically, the idea is to recurse "up"the process hierarchy from the 
target JVM process looking for either occupancy of the same mnt namespace
(meaning that using /tmp to rendezvous on the attach socket will 
succeed) or in the case where the target JVM process has elevated privileges
and thus the jcmd cannot determine if it shares a mnt ns, or cannot 
read/write the /proc/<target_pid>/root/tmp directory.

In this case, the attach code walks "up" the process hierarchy looking 
for the closest ancestor of the target JVM that either occupies the same
mnt ns, or with a r/w /proc/<ancestor_pid>/root/tmp

since the JVM does not manipulate its pid nor mnt ns'es or modify it's 
(linux) capabilities, if such has occurred then it was caused by an ancestor
process of the target (in the case of a container this is most likely 
the container manager or a delegate thereof.

should the jcmd find either an ancestor in the same mnt ns (/tmp) or a 
r/w /proc/<ancestor_pid>/root/tmp it will return that path as the directory
in which to rendezvous with the target JVM.

this approach "increases the odds" that the jcmd will successfully 
attach to a containerized and/or elevated privilege JVM.

needless to say this is "experimental" and needs proper stress testing 
for the appropriate use cases.

Rgds

- Larry

-------------- next part --------------
diff --git a/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java b/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java
index 81d4fd259ed..050d8bbb2a9 100644
--- a/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java
+++ b/src/jdk.attach/linux/classes/sun/tools/attach/VirtualMachineImpl.java
@@ -34,6 +34,7 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.nio.file.Files;
+import java.util.Optional;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
@@ -46,13 +47,45 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
     // location is the same for all processes, otherwise the tools
     // will not be able to find all Hotspot processes.
     // Any changes to this needs to be synchronized with HotSpot.
-    private static final String tmpdir = "/tmp";
+    private static final Path TMPDIR = Path.of("/tmp");
+
+    private static final Path PROC     = Path.of("/proc");
+    private static final Path NS_MNT   = Path.of("ns/mnt");
+    private static final Path NS_PID   = Path.of("ns/pid");
+    private static final Path SELF     = PROC.resolve("self");
+    private static final Path TMP      = Path.of("tmp");
+    private static final Path STATUS   = Path.of("status");
+    private static final Path ROOT_TMP = Path.of("root/tmp");
+
+    private static final Optional<Path> SELF_MNT_NS;
+    private static final Optional<Path> SELF_PID_NS;
+
+    static {
+        Path nsPath = null;
+
+        try {
+            nsPath = Files.readSymbolicLink(SELF.resolve(NS_MNT));
+        } catch (IOException _) {
+	    // do nothing
+        } finally {
+            SELF_MNT_NS = Optional.ofNullable(nsPath);
+        }
+
+        try {
+            nsPath = Files.readSymbolicLink(SELF.resolve(NS_PID));
+        } catch (IOException _) {
+            // do nothing
+        } finally {
+            SELF_PID_NS = Optional.ofNullable(nsPath);
+        }
+    }
+
     String socket_path;
+
     /**
      * Attaches to the target VM
      */
-    VirtualMachineImpl(AttachProvider provider, String vmid)
-        throws AttachNotSupportedException, IOException
+    VirtualMachineImpl(AttachProvider provider, String vmid) throws AttachNotSupportedException, IOException
     {
         super(provider, vmid);
 
@@ -63,12 +96,12 @@ public class VirtualMachineImpl extends HotSpotVirtualMachine {
         }
 
         // Try to resolve to the "inner most" pid namespace
-        int ns_pid = getNamespacePid(pid);
+        final long ns_pid = getNamespacePid(pid);
 
         // Find the socket file. If not found then we attempt to start the
         // attach mechanism in the target VM by sending it a QUIT signal.
         // Then we attempt to find the socket file again.
-        File socket_file = findSocketFile(pid, ns_pid);
+        final File socket_file = findSocketFile(pid, ns_pid);
         socket_path = socket_file.getPath();
         if (!socket_file.exists()) {
             // Keep canonical version of File, to delete, in case target process ends and /proc link has gone:
@@ -210,49 +243,95 @@ protected void close(long fd) throws IOException {
     }
 
     // Return the socket file for the given process.
-    private File findSocketFile(int pid, int ns_pid) throws IOException {
-        String root = findTargetProcessTmpDirectory(pid, ns_pid);
-        return new File(root, ".java_pid" + ns_pid);
+    private File findSocketFile(long pid, long ns_pid) throws AttachNotSupportedException, IOException {
+        return new File(findTargetProcessTmpDirectory(pid, ns_pid), ".java_pid" + ns_pid);
     }
 
     // On Linux a simple handshake is used to start the attach mechanism
     // if not already started. The client creates a .attach_pid<pid> file in the
     // target VM's working directory (or temp directory), and the SIGQUIT handler
     // checks for the file.
-    private File createAttachFile(int pid, int ns_pid) throws IOException {
-        String fn = ".attach_pid" + ns_pid;
-        String path = "/proc/" + pid + "/cwd/" + fn;
-        File f = new File(path);
+    private File createAttachFile(long pid, long ns_pid) throws AttachNotSupportedException, IOException {
+        Path fn   = Path.of(".attach_pid" + ns_pid);
+        Path path = PROC.resolve(Path.of(Long.toString(pid), "cwd")).resolve(fn);
+        File f    = new File(path.toString());
         try {
             // Do not canonicalize the file path, or we will fail to attach to a VM in a container.
             f.createNewFile();
-        } catch (IOException x) {
-            String root = findTargetProcessTmpDirectory(pid, ns_pid);
-            f = new File(root, fn);
+        } catch (IOException _) {
+            f = new File(findTargetProcessTmpDirectory(pid, ns_pid), fn.toString());
             f.createNewFile();
         }
         return f;
     }
 
-    private String findTargetProcessTmpDirectory(int pid, int ns_pid) throws IOException {
-        String root;
-        if (pid != ns_pid) {
-            // A process may not exist in the same mount namespace as the caller, e.g.
-            // if we are trying to attach to a JVM process inside a container.
-            // Instead, attach relative to the target root filesystem as exposed by
-            // procfs regardless of namespaces.
-            String procRootDirectory = "/proc/" + pid + "/root";
-            if (!Files.isReadable(Path.of(procRootDirectory))) {
-                throw new IOException(
-                        String.format("Unable to access root directory %s " +
-                          "of target process %d", procRootDirectory, pid));
+    private String findTargetProcessTmpDirectory(long pid, long ns_pid) throws AttachNotSupportedException, IOException {
+        Optional<ProcessHandle> tgt = ProcessHandle.of(pid);
+        Optional<ProcessHandle> ph = tgt;
+        long nsPid = ns_pid;
+        Optional<Path> prevPidNS = Optional.empty();
+
+        while (ph.isPresent()) {
+            final var curPid = ph.get().pid();
+
+            final var procPidPath = PROC.resolve(Long.toString(curPid));
+
+            Optional<Path> tgtMountNS = Optional.empty();
+
+            try {
+                tgtMountNS = Optional.ofNullable(Files.readSymbolicLink(procPidPath.resolve(NS_MNT))); // attempt to read the tgt's mnt ns id...
+            } catch (
+                    IOException _) { // if we fail to read the tgt's mnt ns id then we either dont have access or it no longer exists!
+                if (!Files.exists(procPidPath))
+                    throw new IOException(String.format("unable to attach, %s non-existent! process: %d terminated", procPidPath, pid));
+
+                // ok so if we get here we have failed to read the tgt's mnt ns, but the tgt process still exists ... we do not have privs to read its procfs
+            }
+
+            final var sameMountNS = SELF_MNT_NS.isPresent() && SELF_MNT_NS.equals(tgtMountNS); // will be false  if we did not read the tgt's mnt ns
+
+            if (sameMountNS) {
+                return TMPDIR.toString(); // we share TMPDIR in common!
+            } else {
+                final var procPidRootTmp = procPidPath.resolve(ROOT_TMP);
+
+                if (Files.isReadable(procPidRootTmp)) return procPidRootTmp.toString(); // not in the same mnt ns but tmp is accessible via /proc...
+            }
+
+            // lets attempt to obtain the pid NS... best efforts to avoid crossing pid ns boundaries (as with a container)
+
+            Optional<Path> curPidNS = Optional.empty();
+
+            try {
+                curPidNS = Optional.ofNullable(Files.readSymbolicLink(procPidPath.resolve(NS_PID))); // attempt to read the tgt's mnt ns id...
+            } catch (IOException _) { // if we fail to read the tgt's pid ns id then we either dont have access or it no longer exists!
+                if (!Files.exists(procPidPath)) throw new IOException(String.format("unable to attach, %s non-existent! process: %d terminated", procPidPath, pid));
+
+                // ok so if we get here we have failed to read the tgt's pid ns, but the tgt process still exists ... we do not have privs to read its procfs
+            }
+
+            // recurse "up" the process heirarchy... if appropriate
+
+            final var havePidNSes = prevPidNS.isPresent() && curPidNS.isPresent();
+
+            final var ppid = ph.get().parent();
+
+            if (ppid.isPresent() && (havePidNSes && curPidNS.equals(prevPidNS)) || (!havePidNSes && nsPid > 1)) {
+                ph = ppid;
+
+                nsPid = getNamespacePid(ph.get().pid()); // get the ns pid of the parent...
+
+                prevPidNS = curPidNS;
+            } else {
+                ph = Optional.empty();
             }
+        }
 
-            root = procRootDirectory + "/" + tmpdir;
+        if (tgt.orElseThrow(AttachNotSupportedException::new).isAlive()) {
+            return TMPDIR.toString(); // fallback...
         } else {
-            root = tmpdir;
+            throw new IOException(String.format("unable to attach, process: %d terminated", pid));
         }
-        return root;
     }
 
     /*
@@ -272,10 +351,10 @@ private void writeString(int fd, String s) throws IOException {
 
     // Return the inner most namespaced PID if there is one,
     // otherwise return the original PID.
-    private int getNamespacePid(int pid) throws AttachNotSupportedException, IOException {
+    private long getNamespacePid(long pid) throws AttachNotSupportedException, IOException {
         // Assuming a real procfs sits beneath, reading this doesn't block
         // nor will it consume a lot of memory.
-        String statusFile = "/proc/" + pid + "/status";
+        final var statusFile = PROC.resolve(Long.toString(pid)).resolve(STATUS).toString();
         File f = new File(statusFile);
         if (!f.exists()) {
             return pid; // Likely a bad pid, but this is properly handled later.
@@ -291,8 +370,7 @@ private int getNamespacePid(int pid) throws AttachNotSupportedException, IOExcep
                     // The last entry represents the PID the JVM "thinks" it is.
                     // Even in non-namespaced pids these entries should be
                     // valid. You could refer to it as the inner most pid.
-                    int ns_pid = Integer.parseInt(parts[parts.length - 1]);
-                    return ns_pid;
+                    return Long.parseLong(parts[parts.length - 1]);
                 }
             }
             // Old kernels may not have NSpid field (i.e. 3.10).


More information about the serviceability-dev mailing list