RFR [8024521] (process) Async close issues with Process InputStream

Martin Buchholz martinrb at google.com
Thu Oct 24 17:48:08 UTC 2013


Sorry to cause you all trouble with ZBB urgency...

I took another look at the proposed change.

I approve of the fundamental idea of having a closeLock (or at least I
can't find any other clean solution).  But I think it would be simpler to
just grab that lock once in processExited, making the resulting code more
obviously correct.  If there's a reason for the repeated lock acquires and
releases, I am missing it.

With the race removed, we can also remove the null check for buf.

My proposed alternative is :

diff --git a/src/solaris/classes/java/lang/UNIXProcess.java.linux
b/src/solaris/classes/java/lang/UNIXProcess.java.linux
--- a/src/solaris/classes/java/lang/UNIXProcess.java.linux
+++ b/src/solaris/classes/java/lang/UNIXProcess.java.linux
@@ -342,10 +342,10 @@
         ProcessPipeInputStream(int fd) {
             super(new FileInputStream(newFileDescriptor(fd)));
         }
+        private final Object closeLock = new Object();

         private static byte[] drainInputStream(InputStream in)
                 throws IOException {
-            if (in == null) return null;
             int n = 0;
             int j;
             byte[] a = null;
@@ -358,21 +358,26 @@

         /** Called by the process reaper thread when the process exits. */
         synchronized void processExited() {
-            // Most BufferedInputStream methods are synchronized, but
close()
-            // is not, and so we have to handle concurrent racing close().
-            try {
-                InputStream in = this.in;
-                if (in != null) {
-                    byte[] stragglers = drainInputStream(in);
-                    in.close();
-                    this.in = (stragglers == null) ?
-                        ProcessBuilder.NullInputStream.INSTANCE :
-                        new ByteArrayInputStream(stragglers);
-                    if (buf == null) // asynchronous close()?
-                        this.in = null;
-                }
-            } catch (IOException ignored) {
-                // probably an asynchronous close().
+            synchronized (closeLock) {
+                try {
+                    InputStream in = this.in;
+                    if (in != null) {
+                        byte[] stragglers = drainInputStream(in);
+                        in.close();
+                        this.in = (stragglers == null) ?
+                            ProcessBuilder.NullInputStream.INSTANCE :
+                            new ByteArrayInputStream(stragglers);
+                    }
+                } catch (IOException ignored) {}
+            }
+        }
+
+        @Override
+        public void close() throws IOException {
+            // BufferedInputStream#close() is not synchronized unlike most
other methods.
+            // Synchronizing helps avoid race with processExited().
+            synchronized (closeLock) {
+                super.close();
             }
         }
     }


And here's an improved test:

I propose giving it just 2 seconds to run when run by jtreg, which will not
often repro the bug, but is still a useful test to leave in.

/*
 * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

/**
 * @test
 * @bug 8024521
 * @summary Closing ProcessPipeInputStream at the time the process exits is
racy
 *          and leads to data corruption. Run this test manually (as
 *          an ordinary java program) with  -Xmx8M  to repro bug 8024521.
 * @run main/othervm -Xmx8M -Dtest.duration=2 CloseRace2
 */

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class CloseRace2 {
    private static final String BIG_FILE = "bigfile";

    private static final int[] procFDs = new int[6];

    /** default value sufficient to repro bug 8024521. */
    private static final int testDurationSeconds
        = Integer.getInteger("test.duration", 120);

    static boolean fdInUse(int i) {
        return new File("/proc/self/fd/" + i).exists();
    }

    static boolean[] procFDsInUse() {
        boolean[] inUse = new boolean[procFDs.length];
        for (int i = 0; i < procFDs.length; i++)
            inUse[i] = fdInUse(procFDs[i]);
        return inUse;
    }

    static int count(boolean[] bits) {
        int count = 0;
        for (int i = 0; i < bits.length; i++)
            count += bits[i] ? 1 : 0;
        return count;
    }

    public static void main(String args[]) throws Exception {
        if (!(new File("/proc/self/fd").isDirectory()))
            return;

        // Catch Errors from process reaper
        Thread.setDefaultUncaughtExceptionHandler
            ((t, e) -> { e.printStackTrace(); System.exit(1); });

        try (RandomAccessFile f = new RandomAccessFile(BIG_FILE, "rw")) {
            f.setLength(Runtime.getRuntime().maxMemory()); // provoke OOME
        }

        for (int i = 0, j = 0; j < procFDs.length; i++)
            if (!fdInUse(i))
                procFDs[j++] = i;

        Thread[] threads = {
            new Thread(new OpenLoop()),
            new Thread(new ExecLoop()),
        };
        for (Thread thread : threads)
            thread.start();

        Thread.sleep(testDurationSeconds * 1000);

        for (Thread thread : threads)
            thread.interrupt();
        for (Thread thread : threads)
            thread.join();
    }

    static class OpenLoop implements Runnable {
        public void run() {
            while (!Thread.interrupted()) {
                try {
                    // wait for ExecLoop to finish creating process
                    do {} while (count(procFDsInUse()) != 3);
                    List<InputStream> iss = new ArrayList<>(4);
                    // eat up three "holes" (closed ends of pipe fd pairs)
                    for (int i = 0; i < 3; i++)
                        iss.add(new FileInputStream(BIG_FILE));
                    do {} while (count(procFDsInUse()) == procFDs.length);
                    // hopefully this will racily occupy empty fd slot
                    iss.add(new FileInputStream(BIG_FILE));
                    Thread.sleep(1); // Widen race window
                    for (InputStream is : iss)
                        is.close();
                } catch (InterruptedException e) {
                    break;
                } catch (Exception e) {
                    throw new Error(e);
                }
            }
        }
    }

    static class ExecLoop implements Runnable {
        public void run() {
            ProcessBuilder builder = new ProcessBuilder("/bin/true");
            while (!Thread.interrupted()) {
                try {
                    // wait for OpenLoop to finish
                    do {} while (count(procFDsInUse()) > 0);
                    Process process = builder.start();
                    InputStream is = process.getInputStream();
                    process.waitFor();
                    is.close();
                } catch (InterruptedException e) {
                    break;
                } catch (Exception e) {
                    throw new Error(e);
                }
            }
        }
    }
}




On Thu, Oct 24, 2013 at 5:08 AM, Alan Bateman <Alan.Bateman at oracle.com>wrote:

> On 24/10/2013 01:02, Martin Buchholz wrote:
>
>> I include an improved test, that drops the repro time by an order of
>> magnitude, to around 20 seconds, but still too long and non-portable to
>> make this a well-behaved jtreg test.  I suggest checking in this version,
>> except that I would still make this a manual test.  Now that I have a
>> better repro, I can think about perhaps making a better fix.
>>
> I think we should push Ivan's change today as we have the clear the decks
> for the ZBB milestone. We can create a new bug to add your test when you
> ready as it looks like test fixes (and docs fixes) are okay to fix until
> Dec 12.
>
> -Alan.
>



More information about the core-libs-dev mailing list