Blame SOURCES/0172-lib-add-function-for-removing-userinfo-from-URIs.patch

4b6aa8
From 3e5fb3b7d678786dfd98b412e37f4757c7584aba Mon Sep 17 00:00:00 2001
4b6aa8
From: Jakub Filak <jfilak@redhat.com>
4b6aa8
Date: Wed, 21 Oct 2015 14:20:04 +0200
4b6aa8
Subject: [PATCH] lib: add function for removing userinfo from URIs
4b6aa8
4b6aa8
The function expects a valid URL.
4b6aa8
4b6aa8
Signed-off-by: Jakub Filak <jfilak@redhat.com>
4b6aa8
4b6aa8
Conflicts:
4b6aa8
	src/lib/Makefile.am
4b6aa8
---
4b6aa8
 src/include/internal_libreport.h |  22 ++++++
4b6aa8
 src/lib/Makefile.am              |   3 +-
4b6aa8
 src/lib/uriparser.c              | 166 +++++++++++++++++++++++++++++++++++++++
4b6aa8
 tests/Makefile.am                |   3 +-
4b6aa8
 tests/testsuite.at               |   1 +
4b6aa8
 tests/uriparser.at               | 144 +++++++++++++++++++++++++++++++++
4b6aa8
 6 files changed, 337 insertions(+), 2 deletions(-)
4b6aa8
 create mode 100644 src/lib/uriparser.c
4b6aa8
 create mode 100644 tests/uriparser.at
4b6aa8
4b6aa8
diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
4b6aa8
index 78a17ae..651e339 100644
4b6aa8
--- a/src/include/internal_libreport.h
4b6aa8
+++ b/src/include/internal_libreport.h
4b6aa8
@@ -1043,6 +1043,28 @@ void show_usage_and_die(const char *usage, const struct options *opt) NORETURN;
4b6aa8
  */
4b6aa8
 struct abrt_post_state;
4b6aa8
 
4b6aa8
+/* Decomposes uri to its base elements, removes userinfo out of the hostname and
4b6aa8
+ * composes a new uri without userinfo.
4b6aa8
+ *
4b6aa8
+ * The function does not validate the url.
4b6aa8
+ *
4b6aa8
+ * @param uri The uri that might contain userinfo
4b6aa8
+ * @param result The userinfo free uri will be store here. Cannot be null. Must
4b6aa8
+ * be de-allocated by free.
4b6aa8
+ * @param scheme Scheme of the uri. Can be NULL. Result can be NULL. Result
4b6aa8
+ * must be de-allocated by free.
4b6aa8
+ * @param hostname Hostname of the uri. Can be NULL. Result can be NULL. Result
4b6aa8
+ * must be de-allocated by free.
4b6aa8
+ * @param username Username of the uri. Can be NULL. Result can be NULL. Result
4b6aa8
+ * must be de-allocated by free.
4b6aa8
+ * @param password Password of the uri. Can be NULL. Result can be NULL. Result
4b6aa8
+ * must be de-allocated by free.
4b6aa8
+ * @param location Location of the uri. Can be NULL. Result is never NULL. Result
4b6aa8
+ * must be de-allocated by free.
4b6aa8
+ */
4b6aa8
+#define uri_userinfo_remove libreport_uri_userinfo_remove
4b6aa8
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location);
4b6aa8
+
4b6aa8
 #ifdef __cplusplus
4b6aa8
 }
4b6aa8
 #endif
4b6aa8
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
4b6aa8
index 50142f7..b7e4781 100644
4b6aa8
--- a/src/lib/Makefile.am
4b6aa8
+++ b/src/lib/Makefile.am
4b6aa8
@@ -56,7 +56,8 @@ libreport_la_SOURCES = \
4b6aa8
     config_item_info.c \
4b6aa8
     xml_parser.c \
4b6aa8
     libreport_init.c \
4b6aa8
-    global_configuration.c
4b6aa8
+    global_configuration.c \
4b6aa8
+    uriparser.c
4b6aa8
 
4b6aa8
 libreport_la_CPPFLAGS = \
4b6aa8
     -I$(srcdir)/../include \
4b6aa8
diff --git a/src/lib/uriparser.c b/src/lib/uriparser.c
4b6aa8
new file mode 100644
4b6aa8
index 0000000..01e9782
4b6aa8
--- /dev/null
4b6aa8
+++ b/src/lib/uriparser.c
4b6aa8
@@ -0,0 +1,166 @@
4b6aa8
+/*
4b6aa8
+    Copyright (C) 2015  ABRT team
4b6aa8
+    Copyright (C) 2015  RedHat Inc
4b6aa8
+
4b6aa8
+    This program is free software; you can redistribute it and/or modify
4b6aa8
+    it under the terms of the GNU General Public License as published by
4b6aa8
+    the Free Software Foundation; either version 2 of the License, or
4b6aa8
+    (at your option) any later version.
4b6aa8
+
4b6aa8
+    This program is distributed in the hope that it will be useful,
4b6aa8
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
4b6aa8
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4b6aa8
+    GNU General Public License for more details.
4b6aa8
+
4b6aa8
+    You should have received a copy of the GNU General Public License along
4b6aa8
+    with this program; if not, write to the Free Software Foundation, Inc.,
4b6aa8
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
4b6aa8
+*/
4b6aa8
+
4b6aa8
+#include "internal_libreport.h"
4b6aa8
+
4b6aa8
+#include <regex.h>
4b6aa8
+
4b6aa8
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location)
4b6aa8
+{
4b6aa8
+    /* https://www.ietf.org/rfc/rfc3986.txt
4b6aa8
+     * Appendix B.  Parsing a URI Reference with a Regular Expression
4b6aa8
+     *
4b6aa8
+     * scheme    = $2
4b6aa8
+     * authority = $4
4b6aa8
+     * location  = $5 <- introduced by jfilak
4b6aa8
+     * path      = $6
4b6aa8
+     * query     = $8
4b6aa8
+     * fragment  = $10
4b6aa8
+     *                         12            3  4          56       7   8        9 10 */
4b6aa8
+    const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$";
4b6aa8
+    regex_t re;
4b6aa8
+    int r = regcomp(&re, rfc3986_rx, REG_EXTENDED);
4b6aa8
+    assert(r == 0 || !"BUG: invalid regular expression");
4b6aa8
+
4b6aa8
+    regmatch_t matchptr[10];
4b6aa8
+    r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0);
4b6aa8
+    if (r != 0)
4b6aa8
+    {
4b6aa8
+        log_debug("URI does not match RFC3986 regular expression.");
4b6aa8
+        return -EINVAL;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char));
4b6aa8
+    *result = ptr;
4b6aa8
+    if (scheme != NULL)
4b6aa8
+        *scheme = NULL;
4b6aa8
+    if (hostname != NULL)
4b6aa8
+        *hostname = NULL;
4b6aa8
+    if (username != NULL)
4b6aa8
+        *username = NULL;
4b6aa8
+    if (password != NULL)
4b6aa8
+        *password = NULL;
4b6aa8
+    if (location != NULL)
4b6aa8
+        *location= NULL;
4b6aa8
+
4b6aa8
+    /* https://www.ietf.org/rfc/rfc3986.txt
4b6aa8
+     * 5.3.  Component Recomposition
4b6aa8
+     *
4b6aa8
+      result = ""
4b6aa8
+
4b6aa8
+      if defined(scheme) then
4b6aa8
+         append scheme to result;
4b6aa8
+         append ":" to result;
4b6aa8
+      endif;
4b6aa8
+
4b6aa8
+      if defined(authority) then
4b6aa8
+         append "//" to result;
4b6aa8
+         append authority to result;
4b6aa8
+      endif;
4b6aa8
+
4b6aa8
+      append path to result;
4b6aa8
+
4b6aa8
+      if defined(query) then
4b6aa8
+         append "?" to result;
4b6aa8
+         append query to result;
4b6aa8
+      endif;
4b6aa8
+
4b6aa8
+      if defined(fragment) then
4b6aa8
+         append "#" to result;
4b6aa8
+         append fragment to result;
4b6aa8
+      endif;
4b6aa8
+
4b6aa8
+      return result;
4b6aa8
+    */
4b6aa8
+
4b6aa8
+#define APPEND_MATCH(i, output) \
4b6aa8
+    if (matchptr[(i)].rm_so != -1) \
4b6aa8
+    { \
4b6aa8
+        size_t len = 0; \
4b6aa8
+        len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \
4b6aa8
+        if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \
4b6aa8
+        strncpy(ptr, uri + matchptr[(i)].rm_so, len); \
4b6aa8
+        ptr += len; \
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    /* Append "scheme:" if defined */
4b6aa8
+    APPEND_MATCH(1, scheme);
4b6aa8
+
4b6aa8
+    /* If authority is defined, append "//" */
4b6aa8
+    regmatch_t *match_authority = matchptr + 3;
4b6aa8
+    if (match_authority->rm_so != -1)
4b6aa8
+    {
4b6aa8
+        strcat(ptr, "//");
4b6aa8
+        ptr += 2;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    ++match_authority;
4b6aa8
+    /* If authority has address part, remove userinfo and add the address */
4b6aa8
+    if (match_authority->rm_so != -1)
4b6aa8
+    {
4b6aa8
+        size_t len = match_authority->rm_eo - match_authority->rm_so;
4b6aa8
+        const char *authority = uri + match_authority->rm_so;
4b6aa8
+
4b6aa8
+        /* Find the last '@'. Just for the case some used @ in username or
4b6aa8
+         * password */
4b6aa8
+        size_t at = len;
4b6aa8
+        while (at != 0)
4b6aa8
+        {
4b6aa8
+            if (authority[--at] != '@')
4b6aa8
+                continue;
4b6aa8
+
4b6aa8
+            /* Find the first ':' before @. There should not be more ':' but this
4b6aa8
+             * is the most secure way -> avoid leaking an excerpt of a password
4b6aa8
+             * containing ':'.*/
4b6aa8
+            size_t colon = 0;
4b6aa8
+            while (colon < at)
4b6aa8
+            {
4b6aa8
+                if (authority[colon] != ':')
4b6aa8
+                {
4b6aa8
+                    ++colon;
4b6aa8
+                    continue;
4b6aa8
+                }
4b6aa8
+
4b6aa8
+                if (password != NULL)
4b6aa8
+                    *password = xstrndup(authority + colon + 1, at - colon - 1);
4b6aa8
+
4b6aa8
+                break;
4b6aa8
+            }
4b6aa8
+
4b6aa8
+            if (username != NULL)
4b6aa8
+                *username = xstrndup(authority, colon);
4b6aa8
+
4b6aa8
+            ++at;
4b6aa8
+            break;
4b6aa8
+        }
4b6aa8
+
4b6aa8
+        len -= at;
4b6aa8
+
4b6aa8
+        if (hostname != NULL)
4b6aa8
+            *hostname = xstrndup(authority + at, len);
4b6aa8
+
4b6aa8
+        strncpy(ptr, authority + at, len);
4b6aa8
+        ptr += len;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    /* Append path, query and fragment or "" */
4b6aa8
+    APPEND_MATCH(5, location);
4b6aa8
+
4b6aa8
+    return 0;
4b6aa8
+}
4b6aa8
diff --git a/tests/Makefile.am b/tests/Makefile.am
4b6aa8
index f36ab57..c22958b 100644
4b6aa8
--- a/tests/Makefile.am
4b6aa8
+++ b/tests/Makefile.am
4b6aa8
@@ -45,7 +45,8 @@ TESTSUITE_AT = \
4b6aa8
   ureport.at \
4b6aa8
   dump_dir.at \
4b6aa8
   global_config.at \
4b6aa8
-  iso_date.at
4b6aa8
+  iso_date.at \
4b6aa8
+  uriparser.at
4b6aa8
 
4b6aa8
 EXTRA_DIST += $(TESTSUITE_AT)
4b6aa8
 TESTSUITE = $(srcdir)/testsuite
4b6aa8
diff --git a/tests/testsuite.at b/tests/testsuite.at
4b6aa8
index e5e2f72..72e0715 100644
4b6aa8
--- a/tests/testsuite.at
4b6aa8
+++ b/tests/testsuite.at
4b6aa8
@@ -20,3 +20,4 @@ m4_include([ureport.at])
4b6aa8
 m4_include([dump_dir.at])
4b6aa8
 m4_include([global_config.at])
4b6aa8
 m4_include([iso_date.at])
4b6aa8
+m4_include([uriparser.at])
4b6aa8
diff --git a/tests/uriparser.at b/tests/uriparser.at
4b6aa8
new file mode 100644
4b6aa8
index 0000000..def021f
4b6aa8
--- /dev/null
4b6aa8
+++ b/tests/uriparser.at
4b6aa8
@@ -0,0 +1,144 @@
4b6aa8
+# -*- Autotest -*-
4b6aa8
+
4b6aa8
+AT_BANNER([uriparser])
4b6aa8
+
4b6aa8
+## ------------------- ##
4b6aa8
+## uri_userinfo_remove ##
4b6aa8
+## ------------------- ##
4b6aa8
+
4b6aa8
+AT_TESTFUN([uri_userinfo_remove],
4b6aa8
+[[#include "internal_libreport.h"
4b6aa8
+#include <assert.h>
4b6aa8
+#include <string.h>
4b6aa8
+#include <stdio.h>
4b6aa8
+
4b6aa8
+bool string_cmp(const char *message, const char *orig, const char *other)
4b6aa8
+{
4b6aa8
+    if (orig == NULL && other != NULL)
4b6aa8
+    {
4b6aa8
+        printf("%s: expected NULL got '%s'\n", message, other);
4b6aa8
+        return false;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    if (orig != NULL && other == NULL)
4b6aa8
+    {
4b6aa8
+        printf("%s: expected '%s' got NULL\n", message, orig);
4b6aa8
+        return false;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    if (orig == NULL && other == NULL)
4b6aa8
+        return true;
4b6aa8
+
4b6aa8
+    if (strcmp(orig, other) == 0)
4b6aa8
+        return true;
4b6aa8
+
4b6aa8
+    printf("%s: '%s' != '%s'\n", message, orig, other);
4b6aa8
+    return false;
4b6aa8
+}
4b6aa8
+
4b6aa8
+int test(int retval, const char *uri, const char *result, const char *scheme, const char *hostname, const char *username, const char *password, const char *location)
4b6aa8
+{
4b6aa8
+    int e = 0;
4b6aa8
+    const char *names[] = {"result", "scheme", "hostname", "username", "password", "location"} ;
4b6aa8
+    char *outputs[6];
4b6aa8
+    const char *expected[6];
4b6aa8
+
4b6aa8
+    for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
4b6aa8
+        outputs[i] = (char *)0xDEADBEEF;
4b6aa8
+
4b6aa8
+    expected[0] = result;
4b6aa8
+    expected[1] = scheme;
4b6aa8
+    expected[2] = hostname;
4b6aa8
+    expected[3] = username;
4b6aa8
+    expected[4] = password;
4b6aa8
+    expected[5] = location;
4b6aa8
+
4b6aa8
+    fprintf(stderr, "==== Testing: '%s'\n", uri);
4b6aa8
+    fprintf(stdout, "==== Testing: '%s'\n", uri);
4b6aa8
+
4b6aa8
+    int r = uri_userinfo_remove(uri, &outputs[0], &outputs[1], &outputs[2], &outputs[3], &outputs[4], &outputs[5]);
4b6aa8
+    if (r != retval)
4b6aa8
+    {
4b6aa8
+        printf("Invalid retval %d != %d\n", retval, r);
4b6aa8
+        ++e;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    if (r != -EINVAL)
4b6aa8
+    {
4b6aa8
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
4b6aa8
+        {
4b6aa8
+            if (outputs[i] == (char *)0xDEADBEEF)
4b6aa8
+            {
4b6aa8
+                printf("Not initialized argument '%s'\n", names[i]);
4b6aa8
+                ++e;
4b6aa8
+            }
4b6aa8
+            else
4b6aa8
+            {
4b6aa8
+                e += !string_cmp(names[i], expected[i], outputs[i]);
4b6aa8
+                free(outputs[i]);
4b6aa8
+                outputs[i] = (char *)0xDEADBEEF;
4b6aa8
+            }
4b6aa8
+        }
4b6aa8
+    }
4b6aa8
+    else
4b6aa8
+    {
4b6aa8
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
4b6aa8
+        {
4b6aa8
+            if (outputs[i] != (char *)0xDEADBEEF)
4b6aa8
+            {
4b6aa8
+                printf("Touched argument '%s'\n", names[i]);
4b6aa8
+                ++e;
4b6aa8
+            }
4b6aa8
+        }
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    fprintf(stderr, "== Test without arguments\n");
4b6aa8
+    fprintf(stdout, "== Test without arguments\n");
4b6aa8
+
4b6aa8
+
4b6aa8
+    r = uri_userinfo_remove(uri, &outputs[0], NULL, NULL, NULL, NULL, NULL);
4b6aa8
+    if (r != retval)
4b6aa8
+    {
4b6aa8
+        printf("Invalid retval without arguments: %d != %d\n", retval, r);
4b6aa8
+        ++e;
4b6aa8
+    }
4b6aa8
+
4b6aa8
+    e += !string_cmp(names[0], result, outputs[0]);
4b6aa8
+    free(outputs[0]);
4b6aa8
+
4b6aa8
+    return e;
4b6aa8
+}
4b6aa8
+
4b6aa8
+int main(void)
4b6aa8
+{
4b6aa8
+    g_verbose=3;
4b6aa8
+
4b6aa8
+    int e = 0;
4b6aa8
+    e += test(      0, "ftp://root:password@", "ftp://", "ftp:", "", "root", "password", "");
4b6aa8
+    e += test(      0, "ftp://root:password@/", "ftp:///", "ftp:", "", "root", "password", "/");
4b6aa8
+    e += test(      0, "ftp://root:password@/foo", "ftp:///foo", "ftp:", "", "root", "password", "/foo");
4b6aa8
+    e += test(      0, "ftp://@", "ftp://", "ftp:", "", "", NULL, "");
4b6aa8
+    e += test(      0, "ftp://@/", "ftp:///", "ftp:", "", "", NULL, "/");
4b6aa8
+    e += test(      0, "ftp://@/foo", "ftp:///foo", "ftp:", "", "", NULL, "/foo");
4b6aa8
+    e += test(      0, "ftp://:@", "ftp://", "ftp:", "", "", "", "");
4b6aa8
+    e += test(      0, "ftp://:@/", "ftp:///", "ftp:", "", "", "", "/");
4b6aa8
+    e += test(      0, "ftp://:@/foo", "ftp:///foo", "ftp:", "", "", "", "/foo");
4b6aa8
+    e += test(      0, "root:password", "root:password", "root:", NULL, NULL, NULL, "password");
4b6aa8
+    e += test(      0, "root:password@", "root:password@", "root:", NULL, NULL, NULL, "password@");
4b6aa8
+    e += test(      0, "ftp://root:password", "ftp://root:password", "ftp:", "root:password", NULL, NULL, "");
4b6aa8
+    e += test(      0, "scp:://root:password@localhost", "scp:://root:password@localhost", "scp:", NULL, NULL, NULL, "://root:password@localhost");
4b6aa8
+    e += test(      0, "scp:///root:password@localhost", "scp:///root:password@localhost", "scp:", "", NULL, NULL, "/root:password@localhost");
4b6aa8
+    e += test(      0, "ftp://root:password/", "ftp://root:password/", "ftp:", "root:password", NULL, NULL, "/");
4b6aa8
+    e += test(      0, "scp://B@rt:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "P@ssw0rd", "/t@rget1?query=foo#head");
4b6aa8
+    e += test(      0, "scp://B@rt@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", NULL, "/t@rget1?query=foo#head");
4b6aa8
+    e += test(      0, "scp://B@rt:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "", "/t@rget1?query=foo#head");
4b6aa8
+    e += test(      0, "scp://:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "P@ssw0rd", "/t@rget1?query=foo#head");
4b6aa8
+    e += test(      0, "scp://@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", NULL, "/t@rget1?query=foo#head");
4b6aa8
+    e += test(      0, "scp://:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "", "/t@rget1?query=foo#head");
4b6aa8
+    e += test(      0, "password/root", "password/root", NULL, NULL, NULL, NULL, "password/root");
4b6aa8
+    e += test(      0, "/password/root", "/password/root", NULL, NULL, NULL, NULL, "/password/root");
4b6aa8
+    e += test(      0, "://root:passowrd@localhost", "://root:passowrd@localhost", NULL, NULL, NULL, NULL, "://root:passowrd@localhost");
4b6aa8
+
4b6aa8
+    return e;
4b6aa8
+}
4b6aa8
+]])
4b6aa8
-- 
4b6aa8
1.8.3.1
4b6aa8