Blame SOURCES/0641-core-Implement-sync_with_progress.patch

17b0f1
From db57bf73d3e5e650b261834a0c39c9d368f9eeea Mon Sep 17 00:00:00 2001
17b0f1
From: Kyle Walker <kwalker@redhat.com>
17b0f1
Date: Thu, 14 Dec 2017 11:46:03 -0500
17b0f1
Subject: [PATCH] core: Implement sync_with_progress()
17b0f1
17b0f1
In similar fashion to the previous change, sync() operations can stall
17b0f1
endlessly if cache is unable to be written out. In order to avoid an
17b0f1
unbounded hang, the sync takes place within a child process. Every 10
17b0f1
seconds (SYNC_TIMEOUT_USEC), the value of /proc/meminfo "Dirty" is checked
17b0f1
to verify it is smaller than the last iteration. If the sync is not making
17b0f1
progress for 3 successive iterations (SYNC_PROGRESS_ATTEMPTS), a SIGKILL is
17b0f1
sent to the sync process and the shutdown continues.
17b0f1
17b0f1
(cherry picked from commit 73ad712fcfea5d8ba475044698d31d2c15d4180d)
17b0f1
17b0f1
Related: #1571098
17b0f1
---
17b0f1
 src/core/shutdown.c | 116 ++++++++++++++++++++++++++++++++++++++++++--
17b0f1
 1 file changed, 111 insertions(+), 5 deletions(-)
17b0f1
17b0f1
diff --git a/src/core/shutdown.c b/src/core/shutdown.c
17b0f1
index 71f001ac13..0b0a54a7de 100644
17b0f1
--- a/src/core/shutdown.c
17b0f1
+++ b/src/core/shutdown.c
17b0f1
@@ -53,6 +53,9 @@
17b0f1
 
17b0f1
 #define FINALIZE_ATTEMPTS 50
17b0f1
 
17b0f1
+#define SYNC_PROGRESS_ATTEMPTS 3
17b0f1
+#define SYNC_TIMEOUT_USEC (10*USEC_PER_SEC)
17b0f1
+
17b0f1
 static char* arg_verb;
17b0f1
 
17b0f1
 static int parse_argv(int argc, char *argv[]) {
17b0f1
@@ -152,6 +155,102 @@ static int switch_root_initramfs(void) {
17b0f1
         return switch_root("/run/initramfs", "/oldroot", false, MS_BIND);
17b0f1
 }
17b0f1
 
17b0f1
+/* Read the following fields from /proc/meminfo:
17b0f1
+ *
17b0f1
+ *  NFS_Unstable
17b0f1
+ *  Writeback
17b0f1
+ *  Dirty
17b0f1
+ *
17b0f1
+ * Return true if the sum of these fields is greater than the previous
17b0f1
+ * value input. For all other issues, report the failure and indicate that
17b0f1
+ * the sync is not making progress.
17b0f1
+ */
17b0f1
+static bool sync_making_progress(unsigned long long *prev_dirty) {
17b0f1
+        _cleanup_fclose_ FILE *f = NULL;
17b0f1
+        char line[LINE_MAX];
17b0f1
+        bool r = false;
17b0f1
+        unsigned long long val = 0;
17b0f1
+
17b0f1
+        f = fopen("/proc/meminfo", "re");
17b0f1
+        if (!f)
17b0f1
+                return log_warning_errno(errno, "Failed to open /proc/meminfo: %m");
17b0f1
+
17b0f1
+        FOREACH_LINE(line, f, log_warning_errno(errno, "Failed to parse /proc/meminfo: %m")) {
17b0f1
+                unsigned long long ull = 0;
17b0f1
+
17b0f1
+                if (!first_word(line, "NFS_Unstable:") && !first_word(line, "Writeback:") && !first_word(line, "Dirty:"))
17b0f1
+                        continue;
17b0f1
+
17b0f1
+                errno = 0;
17b0f1
+                if (sscanf(line, "%*s %llu %*s", &ull) != 1) {
17b0f1
+                        if (errno != 0)
17b0f1
+                                log_warning_errno(errno, "Failed to parse /proc/meminfo: %m");
17b0f1
+                        else
17b0f1
+                                log_warning("Failed to parse /proc/meminfo");
17b0f1
+
17b0f1
+                        return false;
17b0f1
+                }
17b0f1
+
17b0f1
+                val += ull;
17b0f1
+        }
17b0f1
+
17b0f1
+        r = *prev_dirty > val;
17b0f1
+
17b0f1
+        *prev_dirty = val;
17b0f1
+
17b0f1
+        return r;
17b0f1
+}
17b0f1
+
17b0f1
+static void sync_with_progress(void) {
17b0f1
+        unsigned checks;
17b0f1
+        pid_t pid;
17b0f1
+        int r;
17b0f1
+        unsigned long long dirty = ULONG_LONG_MAX;
17b0f1
+
17b0f1
+        BLOCK_SIGNALS(SIGCHLD);
17b0f1
+
17b0f1
+        /* Due to the possiblity of the sync operation hanging, we fork
17b0f1
+         * a child process and monitor the progress. If the timeout
17b0f1
+         * lapses, the assumption is that that particular sync stalled. */
17b0f1
+        pid = fork();
17b0f1
+        if (pid < 0) {
17b0f1
+                log_error_errno(errno, "Failed to fork: %m");
17b0f1
+                return;
17b0f1
+        }
17b0f1
+
17b0f1
+        if (pid == 0) {
17b0f1
+                /* Start the sync operation here in the child */
17b0f1
+                sync();
17b0f1
+                _exit(EXIT_SUCCESS);
17b0f1
+        }
17b0f1
+
17b0f1
+        log_info("Syncing filesystems and block devices.");
17b0f1
+
17b0f1
+        /* Start monitoring the sync operation. If more than
17b0f1
+         * SYNC_PROGRESS_ATTEMPTS lapse without progress being made,
17b0f1
+         * we assume that the sync is stalled */
17b0f1
+        for (checks = 0; checks < SYNC_PROGRESS_ATTEMPTS; checks++) {
17b0f1
+                r = wait_for_terminate_with_timeout(pid, SYNC_TIMEOUT_USEC);
17b0f1
+                if (r == 0)
17b0f1
+                        /* Sync finished without error.
17b0f1
+                         * (The sync itself does not return an error code) */
17b0f1
+                        return;
17b0f1
+                else if (r == -ETIMEDOUT) {
17b0f1
+                        /* Reset the check counter if the "Dirty" value is
17b0f1
+                         * decreasing */
17b0f1
+                        if (sync_making_progress(&dirty))
17b0f1
+                                checks = 0;
17b0f1
+                } else {
17b0f1
+                        log_error_errno(r, "Failed to sync filesystems and block devices: %m");
17b0f1
+                        return;
17b0f1
+                }
17b0f1
+        }
17b0f1
+
17b0f1
+        /* Only reached in the event of a timeout. We should issue a kill
17b0f1
+         * to the stray process. */
17b0f1
+        log_error("Syncing filesystems and block devices - timed out, issuing SIGKILL to PID "PID_FMT".", pid);
17b0f1
+        (void) kill(pid, SIGKILL);
17b0f1
+}
17b0f1
 
17b0f1
 int main(int argc, char *argv[]) {
17b0f1
         bool need_umount, need_swapoff, need_loop_detach, need_dm_detach;
17b0f1
@@ -202,6 +301,13 @@ int main(int argc, char *argv[]) {
17b0f1
         /* lock us into memory */
17b0f1
         mlockall(MCL_CURRENT|MCL_FUTURE);
17b0f1
 
17b0f1
+        /* Synchronize everything that is not written to disk yet at this point already. This is a good idea so that
17b0f1
+         * slow IO is processed here already and the final process killing spree is not impacted by processes
17b0f1
+         * desperately trying to sync IO to disk within their timeout. Do not remove this sync, data corruption will
17b0f1
+         * result. */
17b0f1
+        if (!in_container)
17b0f1
+                sync_with_progress();
17b0f1
+
17b0f1
         log_info("Sending SIGTERM to remaining processes...");
17b0f1
         broadcast_signal(SIGTERM, true, true);
17b0f1
 
17b0f1
@@ -338,12 +444,12 @@ int main(int argc, char *argv[]) {
17b0f1
                           need_loop_detach ? " loop devices," : "",
17b0f1
                           need_dm_detach ? " DM devices," : "");
17b0f1
 
17b0f1
-        /* The kernel will automaticall flush ATA disks and suchlike
17b0f1
-         * on reboot(), but the file systems need to be synce'd
17b0f1
-         * explicitly in advance. So let's do this here, but not
17b0f1
-         * needlessly slow down containers. */
17b0f1
+        /* The kernel will automatically flush ATA disks and suchlike on reboot(), but the file systems need to be
17b0f1
+         * sync'ed explicitly in advance. So let's do this here, but not needlessly slow down containers. Note that we
17b0f1
+         * sync'ed things already once above, but we did some more work since then which might have caused IO, hence
17b0f1
+         * let's do it once more. Do not remove this sync, data corruption will result. */
17b0f1
         if (!in_container)
17b0f1
-                sync();
17b0f1
+                sync_with_progress();
17b0f1
 
17b0f1
         switch (cmd) {
17b0f1