Blame SOURCES/0620-fd-util-add-new-acquire_data_fd-API-helper.patch

17b0f1
From 581edd240f8dd68b1dbb4070353ddb2059eb8a67 Mon Sep 17 00:00:00 2001
17b0f1
From: Lennart Poettering <lennart@poettering.net>
17b0f1
Date: Fri, 27 Oct 2017 10:56:42 +0200
17b0f1
Subject: [PATCH] fd-util: add new acquire_data_fd() API helper
17b0f1
17b0f1
All this function does is place some data in an in-memory read-only fd,
17b0f1
that may be read back to get the original data back.
17b0f1
17b0f1
Doing this in a way that works everywhere, given the different kernels
17b0f1
we support as well as different privilege levels is surprisingly
17b0f1
complex.
17b0f1
17b0f1
(cherry picked from commit a548e14d690133dd8cca2d5ab8082bb23259fd5f)
17b0f1
17b0f1
Related: #1446095
17b0f1
---
17b0f1
 src/shared/util.c    | 156 +++++++++++++++++++++++++++++++++++++++++++
17b0f1
 src/shared/util.h    |  10 +++
17b0f1
 src/test/test-util.c |  49 ++++++++++++++
17b0f1
 3 files changed, 215 insertions(+)
17b0f1
17b0f1
diff --git a/src/shared/util.c b/src/shared/util.c
17b0f1
index af09532733..982f5e044f 100644
17b0f1
--- a/src/shared/util.c
17b0f1
+++ b/src/shared/util.c
17b0f1
@@ -95,6 +95,7 @@
17b0f1
 #include "sparse-endian.h"
17b0f1
 #include "conf-parser.h"
17b0f1
 #include "cgroup-util.h"
17b0f1
+#include "memfd-util.h"
17b0f1
 
17b0f1
 int saved_argc = 0;
17b0f1
 char **saved_argv = NULL;
17b0f1
@@ -8893,3 +8894,158 @@ uint64_t system_tasks_max_scale(uint64_t v, uint64_t max) {
17b0f1
 
17b0f1
         return m / max;
17b0f1
 }
17b0f1
+
17b0f1
+int acquire_data_fd(const void *data, size_t size, unsigned flags) {
17b0f1
+
17b0f1
+        char procfs_path[strlen("/proc/self/fd/") + DECIMAL_STR_MAX(int)];
17b0f1
+        _cleanup_close_pair_ int pipefds[2] = { -1, -1 };
17b0f1
+        char pattern[] = "/dev/shm/data-fd-XXXXXX";
17b0f1
+        _cleanup_close_ int fd = -1;
17b0f1
+        int isz = 0, r;
17b0f1
+        ssize_t n;
17b0f1
+        off_t f;
17b0f1
+
17b0f1
+        assert(data || size == 0);
17b0f1
+
17b0f1
+        /* Acquire a read-only file descriptor that when read from returns the specified data. This is much more
17b0f1
+         * complex than I wish it was. But here's why:
17b0f1
+         *
17b0f1
+         * a) First we try to use memfds. They are the best option, as we can seal them nicely to make them
17b0f1
+         *    read-only. Unfortunately they require kernel 3.17, and – at the time of writing – we still support 3.14.
17b0f1
+         *
17b0f1
+         * b) Then, we try classic pipes. They are the second best options, as we can close the writing side, retaining
17b0f1
+         *    a nicely read-only fd in the reading side. However, they are by default quite small, and unprivileged
17b0f1
+         *    clients can only bump their size to a system-wide limit, which might be quite low.
17b0f1
+         *
17b0f1
+         * c) Then, we try an O_TMPFILE file in /dev/shm (that dir is the only suitable one known to exist from
17b0f1
+         *    earliest boot on). To make it read-only we open the fd a second time with O_RDONLY via
17b0f1
+         *    /proc/self/<fd>. Unfortunately O_TMPFILE is not available on older kernels on tmpfs.
17b0f1
+         *
17b0f1
+         * d) Finally, we try creating a regular file in /dev/shm, which we then delete.
17b0f1
+         *
17b0f1
+         * It sucks a bit that depending on the situation we return very different objects here, but that's Linux I
17b0f1
+         * figure. */
17b0f1
+
17b0f1
+        if (size == 0 && ((flags & ACQUIRE_NO_DEV_NULL) == 0)) {
17b0f1
+                /* As a special case, return /dev/null if we have been called for an empty data block */
17b0f1
+                r = open("/dev/null", O_RDONLY|O_CLOEXEC|O_NOCTTY);
17b0f1
+                if (r < 0)
17b0f1
+                        return -errno;
17b0f1
+
17b0f1
+                return r;
17b0f1
+        }
17b0f1
+
17b0f1
+        if ((flags & ACQUIRE_NO_MEMFD) == 0) {
17b0f1
+                fd = memfd_new("data-fd");
17b0f1
+                if (fd < 0)
17b0f1
+                        goto try_pipe;
17b0f1
+
17b0f1
+                n = write(fd, data, size);
17b0f1
+                if (n < 0)
17b0f1
+                        return -errno;
17b0f1
+                if ((size_t) n != size)
17b0f1
+                        return -EIO;
17b0f1
+
17b0f1
+                f = lseek(fd, 0, SEEK_SET);
17b0f1
+                if (f != 0)
17b0f1
+                        return -errno;
17b0f1
+
17b0f1
+                r = memfd_set_sealed(fd);
17b0f1
+                if (r < 0)
17b0f1
+                        return r;
17b0f1
+
17b0f1
+                r = fd;
17b0f1
+                fd = -1;
17b0f1
+
17b0f1
+                return r;
17b0f1
+        }
17b0f1
+
17b0f1
+try_pipe:
17b0f1
+        if ((flags & ACQUIRE_NO_PIPE) == 0) {
17b0f1
+                if (pipe2(pipefds, O_CLOEXEC|O_NONBLOCK) < 0)
17b0f1
+                        return -errno;
17b0f1
+
17b0f1
+                isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
17b0f1
+                if (isz < 0)
17b0f1
+                        return -errno;
17b0f1
+
17b0f1
+                if ((size_t) isz < size) {
17b0f1
+                        isz = (int) size;
17b0f1
+                        if (isz < 0 || (size_t) isz != size)
17b0f1
+                                return -E2BIG;
17b0f1
+
17b0f1
+                        /* Try to bump the pipe size */
17b0f1
+                        (void) fcntl(pipefds[1], F_SETPIPE_SZ, isz);
17b0f1
+
17b0f1
+                        /* See if that worked */
17b0f1
+                        isz = fcntl(pipefds[1], F_GETPIPE_SZ, 0);
17b0f1
+                        if (isz < 0)
17b0f1
+                                return -errno;
17b0f1
+
17b0f1
+                        if ((size_t) isz < size)
17b0f1
+                                goto try_dev_shm;
17b0f1
+                }
17b0f1
+
17b0f1
+                n = write(pipefds[1], data, size);
17b0f1
+                if (n < 0)
17b0f1
+                        return -errno;
17b0f1
+                if ((size_t) n != size)
17b0f1
+                        return -EIO;
17b0f1
+
17b0f1
+                (void) fd_nonblock(pipefds[0], false);
17b0f1
+
17b0f1
+                r = pipefds[0];
17b0f1
+                pipefds[0] = -1;
17b0f1
+
17b0f1
+                return r;
17b0f1
+        }
17b0f1
+
17b0f1
+try_dev_shm:
17b0f1
+        if ((flags & ACQUIRE_NO_TMPFILE) == 0) {
17b0f1
+                fd = open("/dev/shm", O_RDWR|O_TMPFILE|O_CLOEXEC, 0500);
17b0f1
+                if (fd < 0)
17b0f1
+                        goto try_dev_shm_without_o_tmpfile;
17b0f1
+
17b0f1
+                n = write(fd, data, size);
17b0f1
+                if (n < 0)
17b0f1
+                        return -errno;
17b0f1
+                if ((size_t) n != size)
17b0f1
+                        return -EIO;
17b0f1
+
17b0f1
+                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
17b0f1
+                xsprintf(procfs_path, "/proc/self/fd/%i", fd);
17b0f1
+                r = open(procfs_path, O_RDONLY|O_CLOEXEC);
17b0f1
+                if (r < 0)
17b0f1
+                        return -errno;
17b0f1
+
17b0f1
+                return r;
17b0f1
+        }
17b0f1
+
17b0f1
+try_dev_shm_without_o_tmpfile:
17b0f1
+        if ((flags & ACQUIRE_NO_REGULAR) == 0) {
17b0f1
+                fd = mkostemp_safe(pattern, O_CLOEXEC);
17b0f1
+                if (fd < 0)
17b0f1
+                        return fd;
17b0f1
+
17b0f1
+                n = write(fd, data, size);
17b0f1
+                if (n < 0) {
17b0f1
+                        r = -errno;
17b0f1
+                        goto unlink_and_return;
17b0f1
+                }
17b0f1
+                if ((size_t) n != size) {
17b0f1
+                        r = -EIO;
17b0f1
+                        goto unlink_and_return;
17b0f1
+                }
17b0f1
+
17b0f1
+                /* Let's reopen the thing, in order to get an O_RDONLY fd for the original O_RDWR one */
17b0f1
+                r = open(pattern, O_RDONLY|O_CLOEXEC);
17b0f1
+                if (r < 0)
17b0f1
+                        r = -errno;
17b0f1
+
17b0f1
+        unlink_and_return:
17b0f1
+                (void) unlink(pattern);
17b0f1
+                return r;
17b0f1
+        }
17b0f1
+
17b0f1
+        return -EOPNOTSUPP;
17b0f1
+}
17b0f1
diff --git a/src/shared/util.h b/src/shared/util.h
17b0f1
index 526a6fe848..9c4be02566 100644
17b0f1
--- a/src/shared/util.h
17b0f1
+++ b/src/shared/util.h
17b0f1
@@ -1112,3 +1112,13 @@ int parse_percent(const char *p);
17b0f1
 
17b0f1
 uint64_t system_tasks_max(void);
17b0f1
 uint64_t system_tasks_max_scale(uint64_t v, uint64_t max);
17b0f1
+
17b0f1
+enum {
17b0f1
+        ACQUIRE_NO_DEV_NULL = 1 << 0,
17b0f1
+        ACQUIRE_NO_MEMFD    = 1 << 1,
17b0f1
+        ACQUIRE_NO_PIPE     = 1 << 2,
17b0f1
+        ACQUIRE_NO_TMPFILE  = 1 << 3,
17b0f1
+        ACQUIRE_NO_REGULAR  = 1 << 4,
17b0f1
+};
17b0f1
+
17b0f1
+int acquire_data_fd(const void *data, size_t size, unsigned flags);
17b0f1
diff --git a/src/test/test-util.c b/src/test/test-util.c
17b0f1
index f2c52edcee..efb02ff530 100644
17b0f1
--- a/src/test/test-util.c
17b0f1
+++ b/src/test/test-util.c
17b0f1
@@ -1861,6 +1861,54 @@ static void test_system_tasks_max_scale(void) {
17b0f1
         assert_se(system_tasks_max_scale(UINT64_MAX/4, UINT64_MAX) == UINT64_MAX);
17b0f1
 }
17b0f1
 
17b0f1
+static void test_acquire_data_fd_one(unsigned flags) {
17b0f1
+        char wbuffer[196*1024 - 7];
17b0f1
+        char rbuffer[sizeof(wbuffer)];
17b0f1
+        int fd;
17b0f1
+
17b0f1
+        fd = acquire_data_fd("foo", 3, flags);
17b0f1
+        assert_se(fd >= 0);
17b0f1
+
17b0f1
+        zero(rbuffer);
17b0f1
+        assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 3);
17b0f1
+        assert_se(streq(rbuffer, "foo"));
17b0f1
+
17b0f1
+        fd = safe_close(fd);
17b0f1
+
17b0f1
+        fd = acquire_data_fd("", 0, flags);
17b0f1
+        assert_se(fd >= 0);
17b0f1
+
17b0f1
+        zero(rbuffer);
17b0f1
+        assert_se(read(fd, rbuffer, sizeof(rbuffer)) == 0);
17b0f1
+        assert_se(streq(rbuffer, ""));
17b0f1
+
17b0f1
+        fd = safe_close(fd);
17b0f1
+
17b0f1
+        random_bytes(wbuffer, sizeof(wbuffer));
17b0f1
+
17b0f1
+        fd = acquire_data_fd(wbuffer, sizeof(wbuffer), flags);
17b0f1
+        assert_se(fd >= 0);
17b0f1
+
17b0f1
+        zero(rbuffer);
17b0f1
+        assert_se(read(fd, rbuffer, sizeof(rbuffer)) == sizeof(rbuffer));
17b0f1
+        assert_se(memcmp(rbuffer, wbuffer, sizeof(rbuffer)) == 0);
17b0f1
+
17b0f1
+        fd = safe_close(fd);
17b0f1
+}
17b0f1
+
17b0f1
+static void test_acquire_data_fd(void) {
17b0f1
+
17b0f1
+        test_acquire_data_fd_one(0);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_MEMFD);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_PIPE);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_PIPE);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE);
17b0f1
+        test_acquire_data_fd_one(ACQUIRE_NO_DEV_NULL|ACQUIRE_NO_MEMFD|ACQUIRE_NO_PIPE|ACQUIRE_NO_TMPFILE);
17b0f1
+}
17b0f1
+
17b0f1
 int main(int argc, char *argv[]) {
17b0f1
         log_parse_environment();
17b0f1
         log_open();
17b0f1
@@ -1943,6 +1991,7 @@ int main(int argc, char *argv[]) {
17b0f1
         test_shell_maybe_quote();
17b0f1
         test_system_tasks_max();
17b0f1
         test_system_tasks_max_scale();
17b0f1
+        test_acquire_data_fd();
17b0f1
 
17b0f1
         return 0;
17b0f1
 }