|
|
147e83 |
The commit included in this patch only incidentally fixes the problem
|
|
|
147e83 |
reported in bug 1427734: In each of the IBM9xx character sets referenced in
|
|
|
147e83 |
this patch, the removal of the "break" statement means that the subsequent
|
|
|
147e83 |
increment of "inptr" is executed instead of being skipped. This allows
|
|
|
147e83 |
conversion to progress instead of hanging.
|
|
|
147e83 |
|
|
|
147e83 |
commit 692de4b3960dc90bdcfb871513ee4d81d314cf69
|
|
|
147e83 |
Author: Martin Sebor <msebor@redhat.com>
|
|
|
147e83 |
Date: Fri Jan 15 11:25:13 2016 -0700
|
|
|
147e83 |
|
|
|
147e83 |
Have iconv accept redundant escape sequences in IBM900, IBM903, IBM905,
|
|
|
147e83 |
IBM907, and IBM909.
|
|
|
147e83 |
|
|
|
147e83 |
Patch for bug #17197 changes the encoder to avoid generating redundant
|
|
|
147e83 |
shift sequences. However, those sequences may already be present in
|
|
|
147e83 |
data encododed by prior versions of the encoder. This change modifies
|
|
|
147e83 |
the decoder to also avoid rejecting redundant shift sequences.
|
|
|
147e83 |
|
|
|
147e83 |
[BZ #19432]
|
|
|
147e83 |
* iconvdata/Makefile: Add bug-iconv11.
|
|
|
147e83 |
* iconvdata/bug-iconv11.c: New test.
|
|
|
147e83 |
* iconvdata/ibm930.c: Do not reject redundant shift sequences.
|
|
|
147e83 |
* iconvdata/ibm933.c: Same.
|
|
|
147e83 |
* iconvdata/ibm935.c: Same.
|
|
|
147e83 |
* iconvdata/ibm937.c: Same.
|
|
|
147e83 |
* iconvdata/ibm939.c: Same.
|
|
|
147e83 |
|
|
|
147e83 |
# Conflicts:
|
|
|
147e83 |
# iconvdata/Makefile
|
|
|
147e83 |
|
|
|
147e83 |
diff --git a/iconvdata/Makefile b/iconvdata/Makefile
|
|
|
147e83 |
index 0ec67554ca4c29ea..c4e6c510d7abc055 100644
|
|
|
147e83 |
--- a/iconvdata/Makefile
|
|
|
147e83 |
+++ b/iconvdata/Makefile
|
|
|
147e83 |
@@ -68,7 +68,7 @@ include ../Makeconfig
|
|
|
147e83 |
ifeq (yes,$(build-shared))
|
|
|
147e83 |
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
|
|
|
147e83 |
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
|
|
|
147e83 |
- bug-iconv10 bug-iconv12
|
|
|
147e83 |
+ bug-iconv10 bug-iconv11 bug-iconv12
|
|
|
147e83 |
ifeq ($(have-thread-library),yes)
|
|
|
147e83 |
tests += bug-iconv3
|
|
|
147e83 |
endif
|
|
|
147e83 |
diff --git a/iconvdata/bug-iconv11.c b/iconvdata/bug-iconv11.c
|
|
|
147e83 |
new file mode 100644
|
|
|
147e83 |
index 0000000000000000..6cdc07d79883454d
|
|
|
147e83 |
--- /dev/null
|
|
|
147e83 |
+++ b/iconvdata/bug-iconv11.c
|
|
|
147e83 |
@@ -0,0 +1,114 @@
|
|
|
147e83 |
+/* bug 19432: iconv rejects redundant escape sequences in IBM903,
|
|
|
147e83 |
+ IBM905, IBM907, and IBM909
|
|
|
147e83 |
+
|
|
|
147e83 |
+ Copyright (C) 2016 Free Software Foundation, Inc.
|
|
|
147e83 |
+ This file is part of the GNU C Library.
|
|
|
147e83 |
+
|
|
|
147e83 |
+ The GNU C Library is free software; you can redistribute it and/or
|
|
|
147e83 |
+ modify it under the terms of the GNU Lesser General Public
|
|
|
147e83 |
+ License as published by the Free Software Foundation; either
|
|
|
147e83 |
+ version 2.1 of the License, or (at your option) any later version.
|
|
|
147e83 |
+
|
|
|
147e83 |
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
|
147e83 |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
147e83 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
147e83 |
+ Lesser General Public License for more details.
|
|
|
147e83 |
+
|
|
|
147e83 |
+ You should have received a copy of the GNU Lesser General Public
|
|
|
147e83 |
+ License along with the GNU C Library; if not, see
|
|
|
147e83 |
+ <http://www.gnu.org/licenses/>. */
|
|
|
147e83 |
+
|
|
|
147e83 |
+#include <iconv.h>
|
|
|
147e83 |
+#include <stdio.h>
|
|
|
147e83 |
+#include <stdlib.h>
|
|
|
147e83 |
+#include <string.h>
|
|
|
147e83 |
+#include <errno.h>
|
|
|
147e83 |
+
|
|
|
147e83 |
+// The longest test input sequence.
|
|
|
147e83 |
+#define MAXINBYTES 8
|
|
|
147e83 |
+#define MAXOUTBYTES (MAXINBYTES * MB_LEN_MAX)
|
|
|
147e83 |
+
|
|
|
147e83 |
+/* Verify that a conversion of the INPUT sequence consisting of
|
|
|
147e83 |
+ INBYTESLEFT bytes in the encoding specified by the codeset
|
|
|
147e83 |
+ named by FROM_SET is successful.
|
|
|
147e83 |
+ Return 0 on success, non-zero on iconv() failure. */
|
|
|
147e83 |
+
|
|
|
147e83 |
+static int
|
|
|
147e83 |
+test_ibm93x (const char *from_set, const char *input, size_t inbytesleft)
|
|
|
147e83 |
+{
|
|
|
147e83 |
+ const char to_set[] = "UTF-8";
|
|
|
147e83 |
+ iconv_t cd = iconv_open (to_set, from_set);
|
|
|
147e83 |
+ if (cd == (iconv_t) -1)
|
|
|
147e83 |
+ {
|
|
|
147e83 |
+ printf ("iconv_open(\"%s\", \"%s\"): %s\n",
|
|
|
147e83 |
+ from_set, to_set, strerror (errno));
|
|
|
147e83 |
+ return 1;
|
|
|
147e83 |
+ }
|
|
|
147e83 |
+
|
|
|
147e83 |
+ char output [MAXOUTBYTES];
|
|
|
147e83 |
+ size_t outbytesleft = sizeof output;
|
|
|
147e83 |
+
|
|
|
147e83 |
+ char *inbuf = (char*)input;
|
|
|
147e83 |
+ char *outbuf = output;
|
|
|
147e83 |
+
|
|
|
147e83 |
+ printf ("iconv(cd, %p, %zu, %p, %zu)\n",
|
|
|
147e83 |
+ inbuf, inbytesleft, outbuf, outbytesleft);
|
|
|
147e83 |
+
|
|
|
147e83 |
+ errno = 0;
|
|
|
147e83 |
+ size_t ret = iconv (cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
|
|
|
147e83 |
+ printf (" ==> %td: %s\n"
|
|
|
147e83 |
+ " inbuf%+td, inbytesleft=%zu, outbuf%+td, outbytesleft=%zu\n",
|
|
|
147e83 |
+ ret, strerror (errno),
|
|
|
147e83 |
+ inbuf - input, inbytesleft, outbuf - output, outbytesleft);
|
|
|
147e83 |
+
|
|
|
147e83 |
+ // Return 0 on success, non-zero on iconv() failure.
|
|
|
147e83 |
+ return ret == (size_t)-1 || errno;
|
|
|
147e83 |
+}
|
|
|
147e83 |
+
|
|
|
147e83 |
+static int
|
|
|
147e83 |
+do_test (void)
|
|
|
147e83 |
+{
|
|
|
147e83 |
+ // State-dependent encodings to exercise.
|
|
|
147e83 |
+ static const char* const to_code[] = {
|
|
|
147e83 |
+ "IBM930", "IBM933", "IBM935", "IBM937", "IBM939"
|
|
|
147e83 |
+ };
|
|
|
147e83 |
+
|
|
|
147e83 |
+ static const size_t ncodesets = sizeof to_code / sizeof *to_code;
|
|
|
147e83 |
+
|
|
|
147e83 |
+ static const struct {
|
|
|
147e83 |
+ char txt[MAXINBYTES];
|
|
|
147e83 |
+ size_t len;
|
|
|
147e83 |
+ } input[] = {
|
|
|
147e83 |
+#define DATA(s) { s, sizeof s - 1 }
|
|
|
147e83 |
+ /* <SI>: denotes the shift-in 1-byte escape sequence, changing
|
|
|
147e83 |
+ the encoder from a sigle-byte encoding to multibyte
|
|
|
147e83 |
+ <SO>: denotes the shift-out 1-byte escape sequence, switching
|
|
|
147e83 |
+ the encoder from a multibyte to a single-byte state */
|
|
|
147e83 |
+
|
|
|
147e83 |
+ DATA ("\x0e"), // <SI> (not redundant)
|
|
|
147e83 |
+ DATA ("\x0f"), // <S0> (redundant with initial state)
|
|
|
147e83 |
+ DATA ("\x0e\x0e"), // <SI><SI>
|
|
|
147e83 |
+ DATA ("\x0e\x0f\x0f"), // <SI><SO><SO>
|
|
|
147e83 |
+ DATA ("\x0f\x0f"), // <SO><SO>
|
|
|
147e83 |
+ DATA ("\x0f\x0e\x0e"), // <SO><SI><SI>
|
|
|
147e83 |
+ DATA ("\x0e\x0f\xc7\x0f"), // <SI><SO><G><SO>
|
|
|
147e83 |
+ DATA ("\xc7\x0f") // <G><SO> (redundant with initial state)
|
|
|
147e83 |
+ };
|
|
|
147e83 |
+
|
|
|
147e83 |
+ static const size_t ninputs = sizeof input / sizeof *input;
|
|
|
147e83 |
+
|
|
|
147e83 |
+ int ret = 0;
|
|
|
147e83 |
+
|
|
|
147e83 |
+ size_t i, j;
|
|
|
147e83 |
+
|
|
|
147e83 |
+ /* Iterate over the IBM93x codesets above and exercise each with
|
|
|
147e83 |
+ the input sequences above. */
|
|
|
147e83 |
+ for (i = 0; i != ncodesets; ++i)
|
|
|
147e83 |
+ for (j = 0; j != ninputs; ++j)
|
|
|
147e83 |
+ ret += test_ibm93x (to_code [i], input [i].txt, input [i].len);
|
|
|
147e83 |
+
|
|
|
147e83 |
+ return ret;
|
|
|
147e83 |
+}
|
|
|
147e83 |
+
|
|
|
147e83 |
+#define TEST_FUNCTION do_test ()
|
|
|
147e83 |
+#include "../test-skeleton.c"
|
|
|
147e83 |
diff --git a/iconvdata/ibm930.c b/iconvdata/ibm930.c
|
|
|
147e83 |
index 636141114f506985..88413ccfbabfdc35 100644
|
|
|
147e83 |
--- a/iconvdata/ibm930.c
|
|
|
147e83 |
+++ b/iconvdata/ibm930.c
|
|
|
147e83 |
@@ -105,24 +105,14 @@ enum
|
|
|
147e83 |
\
|
|
|
147e83 |
if (__builtin_expect (ch, 0) == SO) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift OUT, change to DBCS converter. */ \
|
|
|
147e83 |
- if (curcs == db) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = db; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
} \
|
|
|
147e83 |
else if (__builtin_expect (ch, 0) == SI) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift IN, change to SBCS converter */ \
|
|
|
147e83 |
- if (curcs == sb) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = sb; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
diff --git a/iconvdata/ibm933.c b/iconvdata/ibm933.c
|
|
|
147e83 |
index 8b9e5780a36a454a..335d385551fee86e 100644
|
|
|
147e83 |
--- a/iconvdata/ibm933.c
|
|
|
147e83 |
+++ b/iconvdata/ibm933.c
|
|
|
147e83 |
@@ -104,24 +104,14 @@ enum
|
|
|
147e83 |
\
|
|
|
147e83 |
if (__builtin_expect (ch, 0) == SO) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift OUT, change to DBCS converter. */ \
|
|
|
147e83 |
- if (curcs == db) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = db; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
} \
|
|
|
147e83 |
else if (__builtin_expect (ch, 0) == SI) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift IN, change to SBCS converter. */ \
|
|
|
147e83 |
- if (curcs == sb) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = sb; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
diff --git a/iconvdata/ibm935.c b/iconvdata/ibm935.c
|
|
|
147e83 |
index 4e2d99ab56d7f0d2..520d28a4e9a690fc 100644
|
|
|
147e83 |
--- a/iconvdata/ibm935.c
|
|
|
147e83 |
+++ b/iconvdata/ibm935.c
|
|
|
147e83 |
@@ -104,24 +104,14 @@ enum
|
|
|
147e83 |
\
|
|
|
147e83 |
if (__builtin_expect(ch, 0) == SO) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift OUT, change to DBCS converter. */ \
|
|
|
147e83 |
- if (curcs == db) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = db; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
} \
|
|
|
147e83 |
else if (__builtin_expect (ch, 0) == SI) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift IN, change to SBCS converter. */ \
|
|
|
147e83 |
- if (curcs == sb) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = sb; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
diff --git a/iconvdata/ibm937.c b/iconvdata/ibm937.c
|
|
|
147e83 |
index 1e468871b783e78d..64563bb8bf0441ff 100644
|
|
|
147e83 |
--- a/iconvdata/ibm937.c
|
|
|
147e83 |
+++ b/iconvdata/ibm937.c
|
|
|
147e83 |
@@ -104,24 +104,14 @@ enum
|
|
|
147e83 |
\
|
|
|
147e83 |
if (__builtin_expect (ch, 0) == SO) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift OUT, change to DBCS converter. */ \
|
|
|
147e83 |
- if (curcs == db) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = db; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
} \
|
|
|
147e83 |
else if (__builtin_expect (ch, 0) == SI) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift IN, change to SBCS converter. */ \
|
|
|
147e83 |
- if (curcs == sb) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = sb; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
diff --git a/iconvdata/ibm939.c b/iconvdata/ibm939.c
|
|
|
147e83 |
index 2060b0c329df0c86..4f73e2e55c94972d 100644
|
|
|
147e83 |
--- a/iconvdata/ibm939.c
|
|
|
147e83 |
+++ b/iconvdata/ibm939.c
|
|
|
147e83 |
@@ -104,24 +104,14 @@ enum
|
|
|
147e83 |
\
|
|
|
147e83 |
if (__builtin_expect (ch, 0) == SO) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift OUT, change to DBCS converter. */ \
|
|
|
147e83 |
- if (curcs == db) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift OUT, change to DBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = db; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|
|
|
147e83 |
} \
|
|
|
147e83 |
else if (__builtin_expect (ch, 0) == SI) \
|
|
|
147e83 |
{ \
|
|
|
147e83 |
- /* Shift IN, change to SBCS converter. */ \
|
|
|
147e83 |
- if (curcs == sb) \
|
|
|
147e83 |
- { \
|
|
|
147e83 |
- result = __GCONV_ILLEGAL_INPUT; \
|
|
|
147e83 |
- break; \
|
|
|
147e83 |
- } \
|
|
|
147e83 |
+ /* Shift IN, change to SBCS converter (redundant escape okay). */ \
|
|
|
147e83 |
curcs = sb; \
|
|
|
147e83 |
++inptr; \
|
|
|
147e83 |
continue; \
|