Blame SOURCES/0610-sd-journal-properly-handle-inotify-queue-overflow.patch

17b0f1
From 7204e7f9ea3067bda7e5658a06e91b67c736f8ab Mon Sep 17 00:00:00 2001
17b0f1
From: Lennart Poettering <lennart@poettering.net>
17b0f1
Date: Mon, 12 Feb 2018 16:14:58 +0100
17b0f1
Subject: [PATCH] sd-journal: properly handle inotify queue overflow
17b0f1
17b0f1
This adds proper handling of IN_Q_OVERFLOW: when the inotify queue runs
17b0f1
over we'll reiterate all directories we are looking at. At the same time
17b0f1
we'll mark all files and directories we encounter that way with a
17b0f1
generation counter we first increased. All files and directories not
17b0f1
marked like this are then unloaded.
17b0f1
17b0f1
With this logic we do the best when the inotify queue overflows: we
17b0f1
synchronize our in-memory state again with what's on disk.  This
17b0f1
contains some refactoring of the directory logic, to share more code
17b0f1
between uuid directories and "root" directories and generally make
17b0f1
things a bit more readable by splitting things up into smaller bits.
17b0f1
17b0f1
See: #7998 #8032
17b0f1
17b0f1
(cherry-picked from commit 858749f7312bd0adb5433075a92e1c35a2fb56ac)
17b0f1
17b0f1
Resolves: #1540538
17b0f1
---
17b0f1
 src/journal/journal-file.h     |   2 +
17b0f1
 src/journal/journal-internal.h |   2 +
17b0f1
 src/journal/sd-journal.c       | 237 ++++++++++++++++++++++++++-------
17b0f1
 src/shared/path-util.c         |  14 ++
17b0f1
 src/shared/path-util.h         |   2 +
17b0f1
 5 files changed, 206 insertions(+), 51 deletions(-)
17b0f1
17b0f1
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
17b0f1
index c74ad5fc58..dd8ef52d2a 100644
17b0f1
--- a/src/journal/journal-file.h
17b0f1
+++ b/src/journal/journal-file.h
17b0f1
@@ -121,6 +121,8 @@ typedef struct JournalFile {
17b0f1
 
17b0f1
         void *fsprg_seed;
17b0f1
         size_t fsprg_seed_size;
17b0f1
+
17b0f1
+        unsigned last_seen_generation;
17b0f1
 #endif
17b0f1
 } JournalFile;
17b0f1
 
17b0f1
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
17b0f1
index eb23ac28ad..999e9d8cb6 100644
17b0f1
--- a/src/journal/journal-internal.h
17b0f1
+++ b/src/journal/journal-internal.h
17b0f1
@@ -81,6 +81,7 @@ struct Directory {
17b0f1
         char *path;
17b0f1
         int wd;
17b0f1
         bool is_root;
17b0f1
+        unsigned last_seen_generation;
17b0f1
 };
17b0f1
 
17b0f1
 struct sd_journal {
17b0f1
@@ -102,6 +103,7 @@ struct sd_journal {
17b0f1
         int inotify_fd;
17b0f1
         unsigned current_invalidate_counter, last_invalidate_counter;
17b0f1
         usec_t last_process_usec;
17b0f1
+        unsigned generation;
17b0f1
 
17b0f1
         char *unique_field;
17b0f1
         JournalFile *unique_file;
17b0f1
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
17b0f1
index 14b65cfedd..9186f5188e 100644
17b0f1
--- a/src/journal/sd-journal.c
17b0f1
+++ b/src/journal/sd-journal.c
17b0f1
@@ -1229,8 +1229,16 @@ static int add_any_file(sd_journal *j, const char *path) {
17b0f1
         assert(j);
17b0f1
         assert(path);
17b0f1
 
17b0f1
-        if (ordered_hashmap_get(j->files, path))
17b0f1
-                return 0;
17b0f1
+        if (path) {
17b0f1
+                f = ordered_hashmap_get(j->files, path);
17b0f1
+                if (f) {
17b0f1
+                        /* Mark this file as seen in this generation. This is used to GC old files in
17b0f1
+                         * process_q_overflow() to detect journal files that are still and discern them from those who
17b0f1
+                         * are gone. */
17b0f1
+                        f->last_seen_generation = j->generation;
17b0f1
+                        return 0;
17b0f1
+                }
17b0f1
+        }
17b0f1
 
17b0f1
         if (ordered_hashmap_size(j->files) >= JOURNAL_FILES_MAX) {
17b0f1
                 log_debug("Too many open journal files, not adding %s.", path);
17b0f1
@@ -1252,6 +1260,8 @@ static int add_any_file(sd_journal *j, const char *path) {
17b0f1
                 goto fail;
17b0f1
         }
17b0f1
 
17b0f1
+        f->last_seen_generation = j->generation;
17b0f1
+
17b0f1
         log_debug("File %s added.", f->path);
17b0f1
 
17b0f1
         check_network(j, f->fd);
17b0f1
@@ -1346,10 +1356,96 @@ static int dirname_is_machine_id(const char *fn) {
17b0f1
         return sd_id128_equal(id, machine);
17b0f1
 }
17b0f1
 
17b0f1
+static bool dirent_is_journal_file(const struct dirent *de) {
17b0f1
+        assert(de);
17b0f1
+
17b0f1
+        if (!IN_SET(de->d_type, DT_REG, DT_LNK, DT_UNKNOWN))
17b0f1
+                return false;
17b0f1
+
17b0f1
+        return endswith(de->d_name, ".journal") ||
17b0f1
+                endswith(de->d_name, ".journal~");
17b0f1
+}
17b0f1
+
17b0f1
+static bool dirent_is_id128_subdir(const struct dirent *de) {
17b0f1
+        assert(de);
17b0f1
+
17b0f1
+        if (!IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN))
17b0f1
+                return false;
17b0f1
+
17b0f1
+        return id128_is_valid(de->d_name);
17b0f1
+}
17b0f1
+
17b0f1
+static int directory_open(sd_journal *j, const char *path, DIR **ret) {
17b0f1
+        DIR *d;
17b0f1
+
17b0f1
+        assert(j);
17b0f1
+        assert(path);
17b0f1
+        assert(ret);
17b0f1
+
17b0f1
+        d = opendir(path);
17b0f1
+        if (!d)
17b0f1
+                return -errno;
17b0f1
+
17b0f1
+        *ret = d;
17b0f1
+        return 0;
17b0f1
+}
17b0f1
+
17b0f1
+static int add_directory(sd_journal *j, const char *prefix, const char *dirname);
17b0f1
+
17b0f1
+static void directory_enumerate(sd_journal *j, Directory *m, DIR *d) {
17b0f1
+        struct dirent *de;
17b0f1
+
17b0f1
+        assert(j);
17b0f1
+        assert(m);
17b0f1
+        assert(d);
17b0f1
+
17b0f1
+        FOREACH_DIRENT_ALL(de, d, goto fail) {
17b0f1
+                if (dirent_is_journal_file(de))
17b0f1
+                        (void) add_file(j, m->path, de->d_name);
17b0f1
+
17b0f1
+                if (m->is_root && dirent_is_id128_subdir(de))
17b0f1
+                        (void) add_directory(j, m->path, de->d_name);
17b0f1
+        }
17b0f1
+
17b0f1
+        return;
17b0f1
+
17b0f1
+fail:
17b0f1
+        log_debug_errno(errno, "Failed to enumerate directory %s, ignoring: %m", m->path);
17b0f1
+}
17b0f1
+
17b0f1
+static void directory_watch(sd_journal *j, Directory *m, int fd, uint32_t mask) {
17b0f1
+        int r;
17b0f1
+
17b0f1
+        assert(j);
17b0f1
+        assert(m);
17b0f1
+        assert(fd >= 0);
17b0f1
+
17b0f1
+        /* Watch this directory if that's enabled and if it not being watched yet. */
17b0f1
+
17b0f1
+        if (m->wd > 0) /* Already have a watch? */
17b0f1
+                return;
17b0f1
+        if (j->inotify_fd < 0) /* Not watching at all? */
17b0f1
+                return;
17b0f1
+
17b0f1
+        m->wd = inotify_add_watch_fd(j->inotify_fd, fd, mask);
17b0f1
+        if (m->wd < 0) {
17b0f1
+                log_debug_errno(errno, "Failed to watch journal directory '%s', ignoring: %m", m->path);
17b0f1
+                return;
17b0f1
+        }
17b0f1
+
17b0f1
+        r = hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m);
17b0f1
+        if (r == -EEXIST)
17b0f1
+                log_debug_errno(r, "Directory '%s' already being watched under a different path, ignoring: %m", m->path);
17b0f1
+        if (r < 0) {
17b0f1
+                log_debug_errno(r, "Failed to add watch for journal directory '%s' to hashmap, ignoring: %m", m->path);
17b0f1
+                (void) inotify_rm_watch(j->inotify_fd, m->wd);
17b0f1
+                m->wd = -1;
17b0f1
+        }
17b0f1
+}
17b0f1
+
17b0f1
 static int add_directory(sd_journal *j, const char *prefix, const char *dirname) {
17b0f1
         _cleanup_free_ char *path = NULL;
17b0f1
         _cleanup_closedir_ DIR *d = NULL;
17b0f1
-        struct dirent *de = NULL;
17b0f1
         Directory *m;
17b0f1
         int r, k;
17b0f1
 
17b0f1
@@ -1357,7 +1453,7 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
17b0f1
         assert(prefix);
17b0f1
         assert(dirname);
17b0f1
 
17b0f1
-        log_debug("Considering %s/%s.", prefix, dirname);
17b0f1
+        log_debug("Considering '%s/%s'.", prefix, dirname);
17b0f1
 
17b0f1
         if ((j->flags & SD_JOURNAL_LOCAL_ONLY) &&
17b0f1
             !(dirname_is_machine_id(dirname) > 0 || path_startswith(prefix, "/run")))
17b0f1
@@ -1369,9 +1465,9 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
17b0f1
                 goto fail;
17b0f1
         }
17b0f1
 
17b0f1
-        d = opendir(path);
17b0f1
-        if (!d) {
17b0f1
-                r = log_debug_errno(errno, "Failed to open directory %s: %m", path);
17b0f1
+        r = directory_open(j, path, &d);
17b0f1
+        if (r < 0) {
17b0f1
+                r = log_debug_errno(errno, "Failed to open directory '%s': %m", path);
17b0f1
                 goto fail;
17b0f1
         }
17b0f1
 
17b0f1
@@ -1398,25 +1494,17 @@ static int add_directory(sd_journal *j, const char *prefix, const char *dirname)
17b0f1
                 log_debug("Directory %s added.", m->path);
17b0f1
 
17b0f1
         } else if (m->is_root)
17b0f1
-                return 0;
17b0f1
-
17b0f1
-        if (m->wd <= 0 && j->inotify_fd >= 0) {
17b0f1
-
17b0f1
-                m->wd = inotify_add_watch(j->inotify_fd, m->path,
17b0f1
-                                          IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
17b0f1
-                                          IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
17b0f1
-                                          IN_ONLYDIR);
17b0f1
+                return 0; /* Don't 'downgrade' from root directory */
17b0f1
 
17b0f1
-                if (m->wd > 0 && hashmap_put(j->directories_by_wd, INT_TO_PTR(m->wd), m) < 0)
17b0f1
-                        inotify_rm_watch(j->inotify_fd, m->wd);
17b0f1
-        }
17b0f1
+        m->last_seen_generation = j->generation;
17b0f1
 
17b0f1
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
17b0f1
+        directory_watch(j, m, dirfd(d),
17b0f1
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
17b0f1
+                        IN_DELETE_SELF|IN_MOVE_SELF|IN_UNMOUNT|IN_MOVED_FROM|
17b0f1
+                        IN_ONLYDIR);
17b0f1
 
17b0f1
-                if (dirent_is_file_with_suffix(de, ".journal") ||
17b0f1
-                    dirent_is_file_with_suffix(de, ".journal~"))
17b0f1
-                        (void) add_file(j, m->path, de->d_name);
17b0f1
-        }
17b0f1
+        if (!j->no_new_files)
17b0f1
+                directory_enumerate(j, m, d);
17b0f1
 
17b0f1
         check_network(j, dirfd(d));
17b0f1
 
17b0f1
@@ -1432,13 +1520,14 @@ fail:
17b0f1
 
17b0f1
 static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
17b0f1
         _cleanup_closedir_ DIR *d = NULL;
17b0f1
-        struct dirent *de;
17b0f1
         Directory *m;
17b0f1
         int r, k;
17b0f1
 
17b0f1
         assert(j);
17b0f1
         assert(p);
17b0f1
 
17b0f1
+        log_debug("Considering root directory '%s'.", p);
17b0f1
+
17b0f1
         if ((j->flags & SD_JOURNAL_RUNTIME_ONLY) &&
17b0f1
             !path_startswith(p, "/run"))
17b0f1
                 return -EINVAL;
17b0f1
@@ -1446,12 +1535,11 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
17b0f1
         if (j->prefix)
17b0f1
                 p = strjoina(j->prefix, p);
17b0f1
 
17b0f1
-        d = opendir(p);
17b0f1
-        if (!d) {
17b0f1
-                if (errno == ENOENT && missing_ok)
17b0f1
-                        return 0;
17b0f1
-
17b0f1
-                r = log_debug_errno(errno, "Failed to open root directory %s: %m", p);
17b0f1
+        r = directory_open(j, p, &d);
17b0f1
+        if (r == -ENOENT && missing_ok)
17b0f1
+                return 0;
17b0f1
+        if (r < 0) {
17b0f1
+                log_debug_errno(r, "Failed to open root directory %s: %m", p);
17b0f1
                 goto fail;
17b0f1
         }
17b0f1
 
17b0f1
@@ -1495,19 +1583,12 @@ static int add_root_directory(sd_journal *j, const char *p, bool missing_ok) {
17b0f1
                         inotify_rm_watch(j->inotify_fd, m->wd);
17b0f1
         }
17b0f1
 
17b0f1
-        if (j->no_new_files)
17b0f1
-                return 0;
17b0f1
-
17b0f1
-        FOREACH_DIRENT_ALL(de, d, return log_debug_errno(errno, "Failed to read directory %s: %m", m->path)) {
17b0f1
-                sd_id128_t id;
17b0f1
+        directory_watch(j, m, dirfd(d),
17b0f1
+                        IN_CREATE|IN_MOVED_TO|IN_MODIFY|IN_ATTRIB|IN_DELETE|
17b0f1
+                        IN_ONLYDIR);
17b0f1
 
17b0f1
-                if (dirent_is_file_with_suffix(de, ".journal") ||
17b0f1
-                    dirent_is_file_with_suffix(de, ".journal~"))
17b0f1
-                        (void) add_file(j, m->path, de->d_name);
17b0f1
-                else if (IN_SET(de->d_type, DT_DIR, DT_LNK, DT_UNKNOWN) &&
17b0f1
-                         sd_id128_from_string(de->d_name, &id) >= 0)
17b0f1
-                        (void) add_directory(j, m->path, de->d_name);
17b0f1
-        }
17b0f1
+        if (!j->no_new_files)
17b0f1
+                directory_enumerate(j, m, d);
17b0f1
 
17b0f1
         check_network(j, dirfd(d));
17b0f1
 
17b0f1
@@ -2068,6 +2149,18 @@ _public_ void sd_journal_restart_data(sd_journal *j) {
17b0f1
         j->current_field = 0;
17b0f1
 }
17b0f1
 
17b0f1
+static int reiterate_all_paths(sd_journal *j) {
17b0f1
+        assert(j);
17b0f1
+
17b0f1
+        if (j->no_new_files)
17b0f1
+                return add_current_paths(j);
17b0f1
+
17b0f1
+        if (j->path)
17b0f1
+                return add_root_directory(j, j->path, true);
17b0f1
+
17b0f1
+        return add_search_paths(j);
17b0f1
+}
17b0f1
+
17b0f1
 _public_ int sd_journal_get_fd(sd_journal *j) {
17b0f1
         int r;
17b0f1
 
17b0f1
@@ -2081,15 +2174,11 @@ _public_ int sd_journal_get_fd(sd_journal *j) {
17b0f1
         if (r < 0)
17b0f1
                 return r;
17b0f1
 
17b0f1
-        /* Iterate through all dirs again, to add them to the
17b0f1
-         * inotify */
17b0f1
-        if (j->no_new_files)
17b0f1
-                r = add_current_paths(j);
17b0f1
-        else if (j->path)
17b0f1
-                r = add_root_directory(j, j->path, true);
17b0f1
-        else
17b0f1
-                r = add_search_paths(j);
17b0f1
-        if (r < 0)
17b0f1
+         log_debug("Reiterating files to get inotify watches established.");
17b0f1
+
17b0f1
+        /* Iterate through all dirs again, to add them to the inotify */
17b0f1
+        r = reiterate_all_paths(j);
17b0f1
+         if (r < 0)
17b0f1
                 return r;
17b0f1
 
17b0f1
         return j->inotify_fd;
17b0f1
@@ -2131,12 +2220,58 @@ _public_ int sd_journal_get_timeout(sd_journal *j, uint64_t *timeout_usec) {
17b0f1
         return 1;
17b0f1
 }
17b0f1
 
17b0f1
+static void process_q_overflow(sd_journal *j) {
17b0f1
+        JournalFile *f;
17b0f1
+        Directory *m;
17b0f1
+        Iterator i;
17b0f1
+
17b0f1
+        assert(j);
17b0f1
+
17b0f1
+        /* When the inotify queue overruns we need to enumerate and re-validate all journal files to bring our list
17b0f1
+         * back in sync with what's on disk. For this we pick a new generation counter value. It'll be assigned to all
17b0f1
+         * journal files we encounter. All journal files and all directories that don't carry it after reenumeration
17b0f1
+         * are subject for unloading. */
17b0f1
+
17b0f1
+        log_debug("Inotify queue overrun, reiterating everything.");
17b0f1
+
17b0f1
+        j->generation++;
17b0f1
+        (void) reiterate_all_paths(j);
17b0f1
+
17b0f1
+        ORDERED_HASHMAP_FOREACH(f, j->files, i) {
17b0f1
+
17b0f1
+                if (f->last_seen_generation == j->generation)
17b0f1
+                        continue;
17b0f1
+
17b0f1
+                log_debug("File '%s' hasn't been seen in this enumeration, removing.", f->path);
17b0f1
+                remove_file_real(j, f);
17b0f1
+        }
17b0f1
+
17b0f1
+        HASHMAP_FOREACH(m, j->directories_by_path, i) {
17b0f1
+
17b0f1
+                if (m->last_seen_generation == j->generation)
17b0f1
+                        continue;
17b0f1
+
17b0f1
+                if (m->is_root) /* Never GC root directories */
17b0f1
+                        continue;
17b0f1
+
17b0f1
+                log_debug("Directory '%s' hasn't been seen in this enumeration, removing.", f->path);
17b0f1
+                remove_directory(j, m);
17b0f1
+        }
17b0f1
+
17b0f1
+        log_debug("Reiteration complete.");
17b0f1
+}
17b0f1
+
17b0f1
 static void process_inotify_event(sd_journal *j, struct inotify_event *e) {
17b0f1
         Directory *d;
17b0f1
 
17b0f1
         assert(j);
17b0f1
         assert(e);
17b0f1
 
17b0f1
+        if (e->mask & IN_Q_OVERFLOW) {
17b0f1
+                process_q_overflow(j);
17b0f1
+                return;
17b0f1
+        }
17b0f1
+
17b0f1
         /* Is this a subdirectory we watch? */
17b0f1
         d = hashmap_get(j->directories_by_wd, INT_TO_PTR(e->wd));
17b0f1
         if (d) {
17b0f1
diff --git a/src/shared/path-util.c b/src/shared/path-util.c
17b0f1
index 5d4de9ec4d..fcc591686f 100644
17b0f1
--- a/src/shared/path-util.c
17b0f1
+++ b/src/shared/path-util.c
17b0f1
@@ -861,3 +861,17 @@ char *prefix_root(const char *root, const char *path) {
17b0f1
         strcpy(p, path);
17b0f1
         return n;
17b0f1
 }
17b0f1
+
17b0f1
+int inotify_add_watch_fd(int fd, int what, uint32_t mask) {
17b0f1
+        char path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int) + 1];
17b0f1
+        int r;
17b0f1
+
17b0f1
+        /* This is like inotify_add_watch(), except that the file to watch is not referenced by a path, but by an fd */
17b0f1
+        xsprintf(path, "/proc/self/fd/%i", what);
17b0f1
+
17b0f1
+        r = inotify_add_watch(fd, path, mask);
17b0f1
+        if (r < 0)
17b0f1
+                return -errno;
17b0f1
+
17b0f1
+        return r;
17b0f1
+}
17b0f1
diff --git a/src/shared/path-util.h b/src/shared/path-util.h
17b0f1
index 34c016229c..96490e12b1 100644
17b0f1
--- a/src/shared/path-util.h
17b0f1
+++ b/src/shared/path-util.h
17b0f1
@@ -66,6 +66,8 @@ int fsck_exists(const char *fstype);
17b0f1
 
17b0f1
 char *prefix_root(const char *root, const char *path);
17b0f1
 
17b0f1
+int inotify_add_watch_fd(int fd, int what, uint32_t mask);
17b0f1
+
17b0f1
 /* Similar to prefix_root(), but returns an alloca() buffer, or
17b0f1
  * possibly a const pointer into the path parameter */
17b0f1
 #define prefix_roota(root, path)                                        \