Blame SOURCES/0647-fix-race-between-daemon-reload-and-other-commands.patch

17b0f1
From 13bcf85ffab4b4e67039599246604a3f5b503975 Mon Sep 17 00:00:00 2001
17b0f1
From: David Tardon <dtardon@redhat.com>
17b0f1
Date: Tue, 24 Apr 2018 15:19:38 +0200
17b0f1
Subject: [PATCH] fix race between daemon-reload and other commands
17b0f1
17b0f1
When "systemctl daemon-reload" is run at the same time as "systemctl
17b0f1
start foo", the latter might hang. That's because commands like start
17b0f1
wait for JobRemoved signal to know when the job is finished. But if the
17b0f1
job is finished during reloading, the signal is never sent.
17b0f1
17b0f1
The hang can be easily reproduced by running
17b0f1
17b0f1
    # for ((N=1; N>0; N++)) ; do echo $N ; systemctl daemon-reload ; done
17b0f1
    # for ((N=1; N>0; N++)) ; do echo $N ; systemctl start systemd-coredump.socket ; done
17b0f1
17b0f1
in two different terminals. The start command will hang after 1-2
17b0f1
iterations.
17b0f1
17b0f1
This keeps track of jobs that were started before reload and finished
17b0f1
during it and sends JobRemoved after the reload has finished.
17b0f1
17b0f1
(cherry picked from commit a7a7163df7fc8a9f794f6803b2f6c9c9b0745a1f)
17b0f1
---
17b0f1
 src/core/job.c     | 45 ++++++++++++++++++++++++++++++++++++++++-----
17b0f1
 src/core/job.h     |  2 ++
17b0f1
 src/core/manager.c | 15 +++++++++++++++
17b0f1
 src/core/manager.h |  3 +++
17b0f1
 4 files changed, 60 insertions(+), 5 deletions(-)
17b0f1
17b0f1
diff --git a/src/core/job.c b/src/core/job.c
17b0f1
index 1861c8a633..275503169b 100644
17b0f1
--- a/src/core/job.c
17b0f1
+++ b/src/core/job.c
17b0f1
@@ -53,6 +53,7 @@ Job* job_new_raw(Unit *unit) {
17b0f1
         j->manager = unit->manager;
17b0f1
         j->unit = unit;
17b0f1
         j->type = _JOB_TYPE_INVALID;
17b0f1
+        j->reloaded = false;
17b0f1
 
17b0f1
         return j;
17b0f1
 }
17b0f1
@@ -74,7 +75,7 @@ Job* job_new(Unit *unit, JobType type) {
17b0f1
         return j;
17b0f1
 }
17b0f1
 
17b0f1
-void job_free(Job *j) {
17b0f1
+void job_unlink(Job *j) {
17b0f1
         assert(j);
17b0f1
         assert(!j->installed);
17b0f1
         assert(!j->transaction_prev);
17b0f1
@@ -82,13 +83,28 @@ void job_free(Job *j) {
17b0f1
         assert(!j->subject_list);
17b0f1
         assert(!j->object_list);
17b0f1
 
17b0f1
-        if (j->in_run_queue)
17b0f1
+        if (j->in_run_queue) {
17b0f1
                 LIST_REMOVE(run_queue, j->manager->run_queue, j);
17b0f1
+                j->in_run_queue = false;
17b0f1
+        }
17b0f1
 
17b0f1
-        if (j->in_dbus_queue)
17b0f1
+        if (j->in_dbus_queue) {
17b0f1
                 LIST_REMOVE(dbus_queue, j->manager->dbus_job_queue, j);
17b0f1
+                j->in_dbus_queue = false;
17b0f1
+        }
17b0f1
+
17b0f1
+        j->timer_event_source = sd_event_source_unref(j->timer_event_source);
17b0f1
+}
17b0f1
+
17b0f1
+void job_free(Job *j) {
17b0f1
+        assert(j);
17b0f1
+        assert(!j->installed);
17b0f1
+        assert(!j->transaction_prev);
17b0f1
+        assert(!j->transaction_next);
17b0f1
+        assert(!j->subject_list);
17b0f1
+        assert(!j->object_list);
17b0f1
 
17b0f1
-        sd_event_source_unref(j->timer_event_source);
17b0f1
+        job_unlink(j);
17b0f1
 
17b0f1
         sd_bus_track_unref(j->clients);
17b0f1
         strv_free(j->deserialized_clients);
17b0f1
@@ -246,6 +262,7 @@ int job_install_deserialized(Job *j) {
17b0f1
 
17b0f1
         *pj = j;
17b0f1
         j->installed = true;
17b0f1
+        j->reloaded = true;
17b0f1
 
17b0f1
         if (j->state == JOB_RUNNING)
17b0f1
                 j->unit->manager->n_running_jobs++;
17b0f1
@@ -790,6 +807,19 @@ static void job_emit_status_message(Unit *u, JobType t, JobResult result) {
17b0f1
                 job_print_status_message(u, t, result);
17b0f1
 }
17b0f1
 
17b0f1
+static int job_save_pending_finished_job(Job *j) {
17b0f1
+        int r;
17b0f1
+
17b0f1
+        assert(j);
17b0f1
+
17b0f1
+        r = set_ensure_allocated(&j->manager->pending_finished_jobs, NULL);
17b0f1
+        if (r < 0)
17b0f1
+                return r;
17b0f1
+
17b0f1
+        job_unlink(j);
17b0f1
+        return set_put(j->manager->pending_finished_jobs, j);
17b0f1
+}
17b0f1
+
17b0f1
 int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool already) {
17b0f1
         Unit *u;
17b0f1
         Unit *other;
17b0f1
@@ -829,7 +859,12 @@ int job_finish_and_invalidate(Job *j, JobResult result, bool recursive, bool alr
17b0f1
                 j->manager->n_failed_jobs ++;
17b0f1
 
17b0f1
         job_uninstall(j);
17b0f1
-        job_free(j);
17b0f1
+        /* Remember jobs started before the reload */
17b0f1
+        if (j->manager->n_reloading > 0 && j->reloaded) {
17b0f1
+                if (job_save_pending_finished_job(j) < 0)
17b0f1
+                        job_free(j);
17b0f1
+        } else
17b0f1
+                job_free(j);
17b0f1
 
17b0f1
         /* Fail depending jobs on failure */
17b0f1
         if (result != JOB_DONE && recursive) {
17b0f1
diff --git a/src/core/job.h b/src/core/job.h
17b0f1
index 535052b48f..4ae6f28020 100644
17b0f1
--- a/src/core/job.h
17b0f1
+++ b/src/core/job.h
17b0f1
@@ -172,10 +172,12 @@ struct Job {
17b0f1
         bool sent_dbus_new_signal:1;
17b0f1
         bool ignore_order:1;
17b0f1
         bool irreversible:1;
17b0f1
+        bool reloaded:1;
17b0f1
 };
17b0f1
 
17b0f1
 Job* job_new(Unit *unit, JobType type);
17b0f1
 Job* job_new_raw(Unit *unit);
17b0f1
+void job_unlink(Job *job);
17b0f1
 void job_free(Job *job);
17b0f1
 Job* job_install(Job *j);
17b0f1
 int job_install_deserialized(Job *j);
17b0f1
diff --git a/src/core/manager.c b/src/core/manager.c
17b0f1
index 47b09e1e93..9c406bb5bf 100644
17b0f1
--- a/src/core/manager.c
17b0f1
+++ b/src/core/manager.c
17b0f1
@@ -2702,6 +2702,18 @@ finish:
17b0f1
         return r;
17b0f1
 }
17b0f1
 
17b0f1
+static void manager_flush_finished_jobs(Manager *m) {
17b0f1
+        Job *j;
17b0f1
+
17b0f1
+        while ((j = set_steal_first(m->pending_finished_jobs))) {
17b0f1
+                bus_job_send_removed_signal(j);
17b0f1
+                job_free(j);
17b0f1
+        }
17b0f1
+
17b0f1
+        set_free(m->pending_finished_jobs);
17b0f1
+        m->pending_finished_jobs = NULL;
17b0f1
+}
17b0f1
+
17b0f1
 int manager_reload(Manager *m) {
17b0f1
         int r, q;
17b0f1
         _cleanup_fclose_ FILE *f = NULL;
17b0f1
@@ -2784,6 +2796,9 @@ int manager_reload(Manager *m) {
17b0f1
         assert(m->n_reloading > 0);
17b0f1
         m->n_reloading--;
17b0f1
 
17b0f1
+        if (m->n_reloading <= 0)
17b0f1
+                manager_flush_finished_jobs(m);
17b0f1
+
17b0f1
         m->send_reloading_done = true;
17b0f1
 
17b0f1
         return r;
17b0f1
diff --git a/src/core/manager.h b/src/core/manager.h
17b0f1
index e91e7bd8b3..90d2d982e1 100644
17b0f1
--- a/src/core/manager.h
17b0f1
+++ b/src/core/manager.h
17b0f1
@@ -270,6 +270,9 @@ struct Manager {
17b0f1
 
17b0f1
         /* non-zero if we are reloading or reexecuting, */
17b0f1
         int n_reloading;
17b0f1
+        /* A set which contains all jobs that started before reload and finished
17b0f1
+         * during it */
17b0f1
+        Set *pending_finished_jobs;
17b0f1
 
17b0f1
         unsigned n_installed_jobs;
17b0f1
         unsigned n_failed_jobs;