Blame SOURCES/0640-core-Implement-timeout-based-umount-remount-limit.patch

17b0f1
From 5ccae46f2a192a9347feb604901127c55ce1e039 Mon Sep 17 00:00:00 2001
17b0f1
From: Kyle Walker <kwalker@redhat.com>
17b0f1
Date: Wed, 13 Dec 2017 12:49:26 -0500
17b0f1
Subject: [PATCH] core: Implement timeout based umount/remount limit
17b0f1
17b0f1
Remount, and subsequent umount, attempts can hang for inaccessible network
17b0f1
based mount points. This can leave a system in a hard hang state that
17b0f1
requires a hard reset in order to recover. This change moves the remount,
17b0f1
and umount attempts into separate child processes. The remount and umount
17b0f1
operations will block for up to 90 seconds (DEFAULT_TIMEOUT_USEC). Should
17b0f1
those waits fail, the parent will issue a SIGKILL to the child and continue
17b0f1
with the shutdown efforts.
17b0f1
17b0f1
In addition, instead of only reporting some additional errors on the final
17b0f1
attempt, failures are reported as they occur.
17b0f1
17b0f1
(cherry picked from commit d5641e0d7e8f55937fbc3a7ecd667e42c5836d80)
17b0f1
17b0f1
Related: #1571098
17b0f1
---
17b0f1
 src/core/umount.c         | 112 ++++++++++++++++++++++++++++++--------
17b0f1
 src/shared/def.h          |   2 -
17b0f1
 src/shared/login-shared.c |   1 +
17b0f1
 src/shared/util.c         |  61 +++++++++++++++++++++
17b0f1
 src/shared/util.h         |  16 ++++++
17b0f1
 5 files changed, 168 insertions(+), 24 deletions(-)
17b0f1
17b0f1
diff --git a/src/core/umount.c b/src/core/umount.c
17b0f1
index 91d67c06ca..bd38966124 100644
17b0f1
--- a/src/core/umount.c
17b0f1
+++ b/src/core/umount.c
17b0f1
@@ -363,7 +363,84 @@ static int delete_dm(dev_t devnum) {
17b0f1
         return r >= 0 ? 0 : -errno;
17b0f1
 }
17b0f1
 
17b0f1
-static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_error) {
17b0f1
+static int remount_with_timeout(MountPoint *m, char *options, int *n_failed) {
17b0f1
+        pid_t pid;
17b0f1
+        int r;
17b0f1
+
17b0f1
+        BLOCK_SIGNALS(SIGCHLD);
17b0f1
+
17b0f1
+        /* Due to the possiblity of a remount operation hanging, we
17b0f1
+         * fork a child process and set a timeout. If the timeout
17b0f1
+         * lapses, the assumption is that that particular remount
17b0f1
+         * failed. */
17b0f1
+        pid = fork();
17b0f1
+        if (pid < 0)
17b0f1
+                return log_error_errno(errno, "Failed to fork: %m");
17b0f1
+
17b0f1
+        if (pid == 0) {
17b0f1
+                log_info("Remounting '%s' read-only in with options '%s'.", m->path, options);
17b0f1
+
17b0f1
+                /* Start the mount operation here in the child */
17b0f1
+                r = mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
17b0f1
+                if (r < 0)
17b0f1
+                        log_error_errno(errno, "Failed to remount '%s' read-only: %m", m->path);
17b0f1
+
17b0f1
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
17b0f1
+        }
17b0f1
+
17b0f1
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
17b0f1
+        if (r == -ETIMEDOUT) {
17b0f1
+                log_error_errno(errno, "Remounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
17b0f1
+                (void) kill(pid, SIGKILL);
17b0f1
+        } else if (r < 0)
17b0f1
+                log_error_errno(r, "Failed to wait for process: %m");
17b0f1
+
17b0f1
+        return r;
17b0f1
+}
17b0f1
+
17b0f1
+static int umount_with_timeout(MountPoint *m, bool *changed) {
17b0f1
+        pid_t pid;
17b0f1
+        int r;
17b0f1
+
17b0f1
+        BLOCK_SIGNALS(SIGCHLD);
17b0f1
+
17b0f1
+        /* Due to the possiblity of a umount operation hanging, we
17b0f1
+         * fork a child process and set a timeout. If the timeout
17b0f1
+         * lapses, the assumption is that that particular umount
17b0f1
+         * failed. */
17b0f1
+        pid = fork();
17b0f1
+        if (pid < 0)
17b0f1
+                return log_error_errno(errno, "Failed to fork: %m");
17b0f1
+
17b0f1
+        if (pid == 0) {
17b0f1
+                log_info("Unmounting '%s'.", m->path);
17b0f1
+
17b0f1
+                /* Start the mount operation here in the child Using MNT_FORCE
17b0f1
+                 * causes some filesystems (e.g. FUSE and NFS and other network
17b0f1
+                 * filesystems) to abort any pending requests and return -EIO
17b0f1
+                 * rather than blocking indefinitely. If the filesysten is
17b0f1
+                 * "busy", this may allow processes to die, thus making the
17b0f1
+                 * filesystem less busy so the unmount might succeed (rather
17b0f1
+                 * then return EBUSY).*/
17b0f1
+                r = umount2(m->path, MNT_FORCE);
17b0f1
+                if (r < 0)
17b0f1
+                        log_error_errno(errno, "Failed to unmount %s: %m", m->path);
17b0f1
+
17b0f1
+                _exit(r < 0 ? EXIT_FAILURE : EXIT_SUCCESS);
17b0f1
+        }
17b0f1
+
17b0f1
+        r = wait_for_terminate_with_timeout(pid, DEFAULT_TIMEOUT_USEC);
17b0f1
+        if (r == -ETIMEDOUT) {
17b0f1
+                log_error_errno(errno, "Unmounting '%s' - timed out, issuing SIGKILL to PID "PID_FMT".", m->path, pid);
17b0f1
+                (void) kill(pid, SIGKILL);
17b0f1
+        } else if (r < 0)
17b0f1
+                log_error_errno(r, "Failed to wait for process: %m");
17b0f1
+
17b0f1
+        return r;
17b0f1
+}
17b0f1
+
17b0f1
+
17b0f1
+static int mount_points_list_umount(MountPoint **head, bool *changed) {
17b0f1
         MountPoint *m, *n;
17b0f1
         int n_failed = 0;
17b0f1
 
17b0f1
@@ -405,9 +482,13 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
17b0f1
                          * explicitly remount the super block of that
17b0f1
                          * alias read-only we hence should be
17b0f1
                          * relatively safe regarding keeping the fs we
17b0f1
-                         * can otherwise not see dirty. */
17b0f1
-                        log_info("Remounting '%s' read-only with options '%s'.", m->path, options);
17b0f1
-                        (void) mount(NULL, m->path, NULL, MS_REMOUNT|MS_RDONLY, options);
17b0f1
+                         * can otherwise not see dirty.
17b0f1
+                         *
17b0f1
+                         * Since the remount can hang in the instance of
17b0f1
+                         * remote filesystems, we remount asynchronously
17b0f1
+                         * and skip the subsequent umount if it fails */
17b0f1
+                        if (remount_with_timeout(m, options, &n_failed) < 0)
17b0f1
+                                continue;
17b0f1
                 }
17b0f1
 
17b0f1
                 /* Skip / and /usr since we cannot unmount that
17b0f1
@@ -420,22 +501,14 @@ static int mount_points_list_umount(MountPoint **head, bool *changed, bool log_e
17b0f1
                 )
17b0f1
                         continue;
17b0f1
 
17b0f1
-                /* Trying to umount. Using MNT_FORCE causes some
17b0f1
-                 * filesystems (e.g. FUSE and NFS and other network
17b0f1
-                 * filesystems) to abort any pending requests and
17b0f1
-                 * return -EIO rather than blocking indefinitely.
17b0f1
-                 * If the filesysten is "busy", this may allow processes
17b0f1
-                 * to die, thus making the filesystem less busy so
17b0f1
-                 * the unmount might succeed (rather then return EBUSY).*/
17b0f1
-                log_info("Unmounting %s.", m->path);
17b0f1
-                if (umount2(m->path, MNT_FORCE) == 0) {
17b0f1
+                /* Trying to umount */
17b0f1
+                if (umount_with_timeout(m, changed) < 0)
17b0f1
+                        n_failed++;
17b0f1
+                else {
17b0f1
                         if (changed)
17b0f1
                                 *changed = true;
17b0f1
 
17b0f1
                         mount_point_free(head, m);
17b0f1
-                } else if (log_error) {
17b0f1
-                        log_warning_errno(errno, "Could not unmount %s: %m", m->path);
17b0f1
-                        n_failed++;
17b0f1
                 }
17b0f1
         }
17b0f1
 
17b0f1
@@ -550,17 +623,12 @@ int umount_all(bool *changed) {
17b0f1
         do {
17b0f1
                 umount_changed = false;
17b0f1
 
17b0f1
-                mount_points_list_umount(&mp_list_head, &umount_changed, false);
17b0f1
+                mount_points_list_umount(&mp_list_head, &umount_changed);
17b0f1
                 if (umount_changed)
17b0f1
                         *changed = true;
17b0f1
 
17b0f1
         } while (umount_changed);
17b0f1
 
17b0f1
-        /* umount one more time with logging enabled */
17b0f1
-        r = mount_points_list_umount(&mp_list_head, &umount_changed, true);
17b0f1
-        if (r <= 0)
17b0f1
-                goto end;
17b0f1
-
17b0f1
   end:
17b0f1
         mount_points_list_free(&mp_list_head);
17b0f1
 
17b0f1
diff --git a/src/shared/def.h b/src/shared/def.h
17b0f1
index 9e008a6d2d..f193ab1f9f 100644
17b0f1
--- a/src/shared/def.h
17b0f1
+++ b/src/shared/def.h
17b0f1
@@ -21,8 +21,6 @@
17b0f1
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
17b0f1
 ***/
17b0f1
 
17b0f1
-#include "util.h"
17b0f1
-
17b0f1
 #define DEFAULT_TIMEOUT_USEC (90*USEC_PER_SEC)
17b0f1
 #define DEFAULT_RESTART_USEC (100*USEC_PER_MSEC)
17b0f1
 #define DEFAULT_CONFIRM_USEC (30*USEC_PER_SEC)
17b0f1
diff --git a/src/shared/login-shared.c b/src/shared/login-shared.c
17b0f1
index 054c77503b..5da0f05838 100644
17b0f1
--- a/src/shared/login-shared.c
17b0f1
+++ b/src/shared/login-shared.c
17b0f1
@@ -21,6 +21,7 @@
17b0f1
 
17b0f1
 #include "login-shared.h"
17b0f1
 #include "def.h"
17b0f1
+#include "util.h"
17b0f1
 
17b0f1
 bool session_id_valid(const char *id) {
17b0f1
         assert(id);
17b0f1
diff --git a/src/shared/util.c b/src/shared/util.c
17b0f1
index 982f5e044f..3216f004ad 100644
17b0f1
--- a/src/shared/util.c
17b0f1
+++ b/src/shared/util.c
17b0f1
@@ -9049,3 +9049,64 @@ try_dev_shm_without_o_tmpfile:
17b0f1
 
17b0f1
         return -EOPNOTSUPP;
17b0f1
 }
17b0f1
+
17b0f1
+/*
17b0f1
+ * Return values:
17b0f1
+ * < 0 : wait_for_terminate_with_timeout() failed to get the state of the
17b0f1
+ *       process, the process timed out, the process was terminated by a
17b0f1
+ *       signal, or failed for an unknown reason.
17b0f1
+ * >=0 : The process terminated normally with no failures.
17b0f1
+ *
17b0f1
+ * Success is indicated by a return value of zero, a timeout is indicated
17b0f1
+ * by ETIMEDOUT, and all other child failure states are indicated by error
17b0f1
+ * is indicated by a non-zero value.
17b0f1
+*/
17b0f1
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout) {
17b0f1
+        sigset_t mask;
17b0f1
+        int r;
17b0f1
+        usec_t until;
17b0f1
+
17b0f1
+        assert_se(sigemptyset(&mask) == 0);
17b0f1
+        assert_se(sigaddset(&mask, SIGCHLD) == 0);
17b0f1
+
17b0f1
+        /* Drop into a sigtimewait-based timeout. Waiting for the
17b0f1
+         * pid to exit. */
17b0f1
+        until = now(CLOCK_MONOTONIC) + timeout;
17b0f1
+        for (;;) {
17b0f1
+                usec_t n;
17b0f1
+                siginfo_t status = {};
17b0f1
+                struct timespec ts;
17b0f1
+
17b0f1
+                n = now(CLOCK_MONOTONIC);
17b0f1
+                if (n >= until)
17b0f1
+                        break;
17b0f1
+
17b0f1
+                r = sigtimedwait(&mask, NULL, timespec_store(&ts, until - n)) < 0 ? -errno : 0;
17b0f1
+                /* Assuming we woke due to the child exiting. */
17b0f1
+                if (waitid(P_PID, pid, &status, WEXITED|WNOHANG) == 0) {
17b0f1
+                        if (status.si_pid == pid) {
17b0f1
+                                /* This is the correct child.*/
17b0f1
+                                if (status.si_code == CLD_EXITED)
17b0f1
+                                        return (status.si_status == 0) ? 0 : -EPROTO;
17b0f1
+                                else
17b0f1
+                                        return -EPROTO;
17b0f1
+                        }
17b0f1
+                }
17b0f1
+                /* Not the child, check for errors and proceed appropriately */
17b0f1
+                if (r < 0) {
17b0f1
+                        switch (r) {
17b0f1
+                        case -EAGAIN:
17b0f1
+                                /* Timed out, child is likely hung. */
17b0f1
+                                return -ETIMEDOUT;
17b0f1
+                        case -EINTR:
17b0f1
+                                /* Received a different signal and should retry */
17b0f1
+                                continue;
17b0f1
+                        default:
17b0f1
+                                /* Return any unexpected errors */
17b0f1
+                                return r;
17b0f1
+                        }
17b0f1
+                }
17b0f1
+        }
17b0f1
+
17b0f1
+        return -EPROTO;
17b0f1
+}
17b0f1
diff --git a/src/shared/util.h b/src/shared/util.h
17b0f1
index 9c4be02566..998f882bbb 100644
17b0f1
--- a/src/shared/util.h
17b0f1
+++ b/src/shared/util.h
17b0f1
@@ -22,6 +22,7 @@
17b0f1
 ***/
17b0f1
 
17b0f1
 #include <alloca.h>
17b0f1
+#include <def.h>
17b0f1
 #include <fcntl.h>
17b0f1
 #include <inttypes.h>
17b0f1
 #include <time.h>
17b0f1
@@ -1122,3 +1123,18 @@ enum {
17b0f1
 };
17b0f1
 
17b0f1
 int acquire_data_fd(const void *data, size_t size, unsigned flags);
17b0f1
+
17b0f1
+int wait_for_terminate_with_timeout(pid_t pid, usec_t timeout);
17b0f1
+
17b0f1
+static inline void block_signals_reset(sigset_t *ss) {
17b0f1
+        assert_se(sigprocmask(SIG_SETMASK, ss, NULL) >= 0);
17b0f1
+}
17b0f1
+
17b0f1
+#define BLOCK_SIGNALS(...)                                                         \
17b0f1
+        _cleanup_(block_signals_reset) _unused_ sigset_t _saved_sigset = ({        \
17b0f1
+                sigset_t _t;                                                       \
17b0f1
+                assert_se(sigprocmask(SIG_SETMASK, NULL, &_t) == 0);               \
17b0f1
+                assert_se(sigprocmask_many(SIG_BLOCK, __VA_ARGS__, -1) >= 0);      \
17b0f1
+                _t;                                                                \
17b0f1
+        })
17b0f1
+