|
|
e5695f |
diff -up diffutils-3.3/src/diff.c.i18n diffutils-3.3/src/diff.c
|
|
|
e5695f |
--- diffutils-3.3/src/diff.c.i18n 2013-02-02 04:39:05.000000000 +0000
|
|
|
e5695f |
+++ diffutils-3.3/src/diff.c 2013-10-23 11:25:18.729708853 +0100
|
|
|
e5695f |
@@ -74,6 +74,8 @@ static void try_help (char const *, char
|
|
|
e5695f |
static void check_stdout (void);
|
|
|
e5695f |
static void usage (void);
|
|
|
e5695f |
|
|
|
e5695f |
+bool (*lines_differ) (char const *, size_t, char const *, size_t);
|
|
|
e5695f |
+
|
|
|
e5695f |
/* If comparing directories, compare their common subdirectories
|
|
|
e5695f |
recursively. */
|
|
|
e5695f |
static bool recursive;
|
|
|
e5695f |
@@ -285,6 +287,13 @@ main (int argc, char **argv)
|
|
|
e5695f |
re_set_syntax (RE_SYNTAX_GREP | RE_NO_POSIX_BACKTRACKING);
|
|
|
e5695f |
excluded = new_exclude ();
|
|
|
e5695f |
|
|
|
e5695f |
+#ifdef HANDLE_MULTIBYTE
|
|
|
e5695f |
+ if (MB_CUR_MAX > 1)
|
|
|
e5695f |
+ lines_differ = lines_differ_multibyte;
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
+ lines_differ = lines_differ_singlebyte;
|
|
|
e5695f |
+
|
|
|
e5695f |
/* Decode the options. */
|
|
|
e5695f |
|
|
|
e5695f |
while ((c = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
|
|
|
e5695f |
diff -up diffutils-3.3/src/diff.h.i18n diffutils-3.3/src/diff.h
|
|
|
e5695f |
--- diffutils-3.3/src/diff.h.i18n 2013-02-02 04:39:05.000000000 +0000
|
|
|
e5695f |
+++ diffutils-3.3/src/diff.h 2013-10-23 11:25:18.729708853 +0100
|
|
|
e5695f |
@@ -23,6 +23,17 @@
|
|
|
e5695f |
#include <stdio.h>
|
|
|
e5695f |
#include <unlocked-io.h>
|
|
|
e5695f |
|
|
|
e5695f |
+/* For platforms which support the ISO C ammendment 1 functionality we
|
|
|
e5695f |
+ support user-defined character classes. */
|
|
|
e5695f |
+#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H
|
|
|
e5695f |
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
|
|
|
e5695f |
+# include <wchar.h>
|
|
|
e5695f |
+# include <wctype.h>
|
|
|
e5695f |
+# if defined (HAVE_MBRTOWC)
|
|
|
e5695f |
+# define HANDLE_MULTIBYTE 1
|
|
|
e5695f |
+# endif
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
+
|
|
|
e5695f |
/* What kind of changes a hunk contains. */
|
|
|
e5695f |
enum changes
|
|
|
e5695f |
{
|
|
|
e5695f |
@@ -365,7 +376,11 @@ extern void print_sdiff_script (struct c
|
|
|
e5695f |
extern char const change_letter[4];
|
|
|
e5695f |
extern char const pr_program[];
|
|
|
e5695f |
extern char *concat (char const *, char const *, char const *);
|
|
|
e5695f |
-extern bool lines_differ (char const *, char const *) _GL_ATTRIBUTE_PURE;
|
|
|
e5695f |
+extern bool (*lines_differ) (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE;
|
|
|
e5695f |
+extern bool lines_differ_singlebyte (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE;
|
|
|
e5695f |
+#ifdef HANDLE_MULTIBYTE
|
|
|
e5695f |
+extern bool lines_differ_multibyte (char const *, size_t, char const *, size_t) _GL_ATTRIBUTE_PURE;
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
extern lin translate_line_number (struct file_data const *, lin);
|
|
|
e5695f |
extern struct change *find_change (struct change *);
|
|
|
e5695f |
extern struct change *find_reverse_change (struct change *);
|
|
|
e5695f |
diff -up diffutils-3.3/src/io.c.i18n diffutils-3.3/src/io.c
|
|
|
e5695f |
--- diffutils-3.3/src/io.c.i18n 2013-02-02 04:39:05.000000000 +0000
|
|
|
e5695f |
+++ diffutils-3.3/src/io.c 2013-10-23 12:03:09.155348827 +0100
|
|
|
e5695f |
@@ -23,6 +23,7 @@
|
|
|
e5695f |
#include <cmpbuf.h>
|
|
|
e5695f |
#include <file-type.h>
|
|
|
e5695f |
#include <xalloc.h>
|
|
|
e5695f |
+#include <assert.h>
|
|
|
e5695f |
|
|
|
e5695f |
/* Rotate an unsigned value to the left. */
|
|
|
e5695f |
#define ROL(v, n) ((v) << (n) | (v) >> (sizeof (v) * CHAR_BIT - (n)))
|
|
|
e5695f |
@@ -208,6 +209,28 @@ slurp (struct file_data *current)
|
|
|
e5695f |
|
|
|
e5695f |
/* Split the file into lines, simultaneously computing the equivalence
|
|
|
e5695f |
class for each line. */
|
|
|
e5695f |
+#ifdef HANDLE_MULTIBYTE
|
|
|
e5695f |
+# define MBC2WC(P, END, MBLENGTH, WC, STATE, CONVFAIL) \
|
|
|
e5695f |
+do \
|
|
|
e5695f |
+ { \
|
|
|
e5695f |
+ mbstate_t state_bak = STATE; \
|
|
|
e5695f |
+ \
|
|
|
e5695f |
+ CONVFAIL = 0; \
|
|
|
e5695f |
+ MBLENGTH = mbrtowc (&WC, P, END - (char const *)P, &STATE); \
|
|
|
e5695f |
+ \
|
|
|
e5695f |
+ switch (MBLENGTH) \
|
|
|
e5695f |
+ { \
|
|
|
e5695f |
+ case (size_t)-2: \
|
|
|
e5695f |
+ case (size_t)-1: \
|
|
|
e5695f |
+ STATE = state_bak; \
|
|
|
e5695f |
+ ++CONVFAIL; \
|
|
|
e5695f |
+ /* Fall through. */ \
|
|
|
e5695f |
+ case 0: \
|
|
|
e5695f |
+ MBLENGTH = 1; \
|
|
|
e5695f |
+ } \
|
|
|
e5695f |
+ } \
|
|
|
e5695f |
+ while (0)
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
|
|
|
e5695f |
static void
|
|
|
e5695f |
find_and_hash_each_line (struct file_data *current)
|
|
|
e5695f |
@@ -234,12 +257,300 @@ find_and_hash_each_line (struct file_dat
|
|
|
e5695f |
bool same_length_diff_contents_compare_anyway =
|
|
|
e5695f |
diff_length_compare_anyway | ig_case;
|
|
|
e5695f |
|
|
|
e5695f |
+#ifdef HANDLE_MULTIBYTE
|
|
|
e5695f |
+ wchar_t wc;
|
|
|
e5695f |
+ size_t mblength;
|
|
|
e5695f |
+ mbstate_t state;
|
|
|
e5695f |
+ int convfail;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ memset (&state, '\0', sizeof (mbstate_t));
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
+
|
|
|
e5695f |
while (p < suffix_begin)
|
|
|
e5695f |
{
|
|
|
e5695f |
char const *ip = p;
|
|
|
e5695f |
hash_value h = 0;
|
|
|
e5695f |
unsigned char c;
|
|
|
e5695f |
|
|
|
e5695f |
+#ifdef HANDLE_MULTIBYTE
|
|
|
e5695f |
+ if (MB_CUR_MAX > 1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ wchar_t lo_wc;
|
|
|
e5695f |
+ char mbc[MB_LEN_MAX];
|
|
|
e5695f |
+ mbstate_t state_wc;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ /* Hash this line until we find a newline. */
|
|
|
e5695f |
+ switch (ig_white_space)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ case IGNORE_ALL_SPACE:
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++p;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (convfail)
|
|
|
e5695f |
+ mbc[0] = *p++;
|
|
|
e5695f |
+ else if (!iswspace (wc))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ bool flag = 0;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ig_case)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ lo_wc = towlower (wc);
|
|
|
e5695f |
+ if (lo_wc != wc)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ flag = 1;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ memset (&state_wc, '\0', sizeof(mbstate_t));
|
|
|
e5695f |
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ assert (mblength != (size_t)-1 &&
|
|
|
e5695f |
+ mblength != (size_t)-2);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ mblength = (mblength < 1) ? 1 : mblength;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!flag)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ mbc[i] = *p++;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ continue;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ h = HASH (h, mbc[i]);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case IGNORE_SPACE_CHANGE:
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++p;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!convfail && iswspace (wc))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++p;
|
|
|
e5695f |
+ goto hashing_done;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
|
|
|
e5695f |
+ if (convfail || !iswspace (wc))
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ h = HASH (h, ' ');
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ /* WC is now the first non-space. */
|
|
|
e5695f |
+ if (convfail)
|
|
|
e5695f |
+ mbc[0] = *p++;
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ bool flag = 0;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ignore_case)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ lo_wc = towlower (wc);
|
|
|
e5695f |
+ if (lo_wc != wc)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ flag = 1;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ memset (&state_wc, '\0', sizeof(mbstate_t));
|
|
|
e5695f |
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ assert (mblength != (size_t)-1 &&
|
|
|
e5695f |
+ mblength != (size_t)-2);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ mblength = (mblength < 1) ? 1 : mblength;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!flag)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ mbc[i] = *p++;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ h = HASH (h, mbc[i]);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case IGNORE_TAB_EXPANSION:
|
|
|
e5695f |
+ case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
|
|
|
e5695f |
+ case IGNORE_TRAILING_SPACE:
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ size_t column = 0;
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++p;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!convfail
|
|
|
e5695f |
+ && ig_white_space & IGNORE_TRAILING_SPACE
|
|
|
e5695f |
+ && iswspace (wc))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ char const *p1 = p;
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p1 == '\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ p = p1 + 1;
|
|
|
e5695f |
+ goto hashing_done;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ p1 += mblength;
|
|
|
e5695f |
+ MBC2WC (p1, suffix_begin, mblength, wc, state, convfail);
|
|
|
e5695f |
+ if (convfail || !iswspace (wc))
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ size_t repetitions = 1;
|
|
|
e5695f |
+ bool no_convert = 0;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ig_white_space & IGNORE_TAB_EXPANSION)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (convfail)
|
|
|
e5695f |
+ column++;
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ switch (wc)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ case L'\b':
|
|
|
e5695f |
+ column -= 0 < column;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case L'\t':
|
|
|
e5695f |
+ mbc[0] = ' ';
|
|
|
e5695f |
+ mblength = 1;
|
|
|
e5695f |
+ no_convert = 1;
|
|
|
e5695f |
+ p++;
|
|
|
e5695f |
+ assert(mblength == 1);
|
|
|
e5695f |
+ repetitions = tabsize - column % tabsize;
|
|
|
e5695f |
+ column = (column + repetitions < column
|
|
|
e5695f |
+ ? 0
|
|
|
e5695f |
+ : column + repetitions);
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case L'\r':
|
|
|
e5695f |
+ column = 0;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ default:
|
|
|
e5695f |
+ column += wcwidth (wc);
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ig_case)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ lo_wc = towlower (wc);
|
|
|
e5695f |
+ if (lo_wc != wc)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ no_convert = 1;
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ memset (&state_wc, '\0', sizeof(mbstate_t));
|
|
|
e5695f |
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ assert (mblength != (size_t)-1 &&
|
|
|
e5695f |
+ mblength != (size_t)-2);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ mblength = (mblength < 1) ? 1 : mblength;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!no_convert)
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ mbc[i] = *p++;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ do
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ h = HASH (h, mbc[i]);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ while (--repetitions != 0);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ default:
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++p;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ MBC2WC (p, suffix_begin, mblength, wc, state, convfail);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (convfail)
|
|
|
e5695f |
+ mbc[0] = *p++;
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ int flag = 0;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ig_case)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ lo_wc = towlower (wc);
|
|
|
e5695f |
+ if (lo_wc != wc)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ flag = 1;
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ memset (&state_wc, '\0', sizeof(mbstate_t));
|
|
|
e5695f |
+ mblength = wcrtomb (mbc, lo_wc, &state_wc);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ assert (mblength != (size_t)-1 &&
|
|
|
e5695f |
+ mblength != (size_t)-2);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ mblength = (mblength < 1) ? 1 : mblength;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!flag)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ mbc[i] = *p++;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ for (i = 0; i < mblength; i++)
|
|
|
e5695f |
+ h = HASH (h, mbc[i]);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
+
|
|
|
e5695f |
/* Hash this line until we find a newline. */
|
|
|
e5695f |
switch (ig_white_space)
|
|
|
e5695f |
{
|
|
|
e5695f |
@@ -390,7 +701,7 @@ find_and_hash_each_line (struct file_dat
|
|
|
e5695f |
else if (!diff_length_compare_anyway)
|
|
|
e5695f |
continue;
|
|
|
e5695f |
|
|
|
e5695f |
- if (! lines_differ (eqline, ip))
|
|
|
e5695f |
+ if (! lines_differ (eqline, eqs[i].length + 1, ip, length + 1))
|
|
|
e5695f |
break;
|
|
|
e5695f |
}
|
|
|
e5695f |
|
|
|
e5695f |
diff -up diffutils-3.3/src/util.c.i18n diffutils-3.3/src/util.c
|
|
|
e5695f |
--- diffutils-3.3/src/util.c.i18n 2013-02-02 04:39:05.000000000 +0000
|
|
|
e5695f |
+++ diffutils-3.3/src/util.c 2013-10-23 11:25:18.730708857 +0100
|
|
|
e5695f |
@@ -408,7 +408,8 @@ finish_output (void)
|
|
|
e5695f |
Return nonzero if the lines differ. */
|
|
|
e5695f |
|
|
|
e5695f |
bool
|
|
|
e5695f |
-lines_differ (char const *s1, char const *s2)
|
|
|
e5695f |
+lines_differ_singlebyte (char const *s1, size_t s1len,
|
|
|
e5695f |
+ char const *s2, size_t s2len)
|
|
|
e5695f |
{
|
|
|
e5695f |
register char const *t1 = s1;
|
|
|
e5695f |
register char const *t2 = s2;
|
|
|
e5695f |
@@ -564,6 +565,354 @@ lines_differ (char const *s1, char const
|
|
|
e5695f |
|
|
|
e5695f |
return true;
|
|
|
e5695f |
}
|
|
|
e5695f |
+
|
|
|
e5695f |
+#ifdef HANDLE_MULTIBYTE
|
|
|
e5695f |
+# define MBC2WC(T, END, MBLENGTH, WC, STATE, CONVFAIL) \
|
|
|
e5695f |
+do \
|
|
|
e5695f |
+ { \
|
|
|
e5695f |
+ mbstate_t bak = STATE; \
|
|
|
e5695f |
+ \
|
|
|
e5695f |
+ CONVFAIL = 0; \
|
|
|
e5695f |
+ MBLENGTH = mbrtowc (&WC, T, END - T, &STATE); \
|
|
|
e5695f |
+ \
|
|
|
e5695f |
+ switch (MBLENGTH) \
|
|
|
e5695f |
+ { \
|
|
|
e5695f |
+ case (size_t)-2: \
|
|
|
e5695f |
+ case (size_t)-1: \
|
|
|
e5695f |
+ STATE = bak; \
|
|
|
e5695f |
+ ++CONVFAIL; \
|
|
|
e5695f |
+ /* Fall through. */ \
|
|
|
e5695f |
+ case 0: \
|
|
|
e5695f |
+ MBLENGTH = 1; \
|
|
|
e5695f |
+ } \
|
|
|
e5695f |
+ } \
|
|
|
e5695f |
+ while (0)
|
|
|
e5695f |
+
|
|
|
e5695f |
+bool
|
|
|
e5695f |
+lines_differ_multibyte (char const *s1, size_t s1len,
|
|
|
e5695f |
+ char const *s2, size_t s2len)
|
|
|
e5695f |
+{
|
|
|
e5695f |
+ char const *end1, *end2;
|
|
|
e5695f |
+ char c1, c2;
|
|
|
e5695f |
+ wchar_t wc1, wc2, wc1_bak, wc2_bak;
|
|
|
e5695f |
+ size_t mblen1, mblen2;
|
|
|
e5695f |
+ mbstate_t state1, state2, state1_bak, state2_bak;
|
|
|
e5695f |
+ int convfail1, convfail2, convfail1_bak, convfail2_bak;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ char const *t1 = s1;
|
|
|
e5695f |
+ char const *t2 = s2;
|
|
|
e5695f |
+ char const *t1_bak, *t2_bak;
|
|
|
e5695f |
+ size_t column = 0;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ignore_white_space == IGNORE_NO_WHITE_SPACE && !ignore_case)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ while (*t1 != '\n')
|
|
|
e5695f |
+ if (*t1++ != *t2++)
|
|
|
e5695f |
+ return 1;
|
|
|
e5695f |
+ return 0;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ end1 = t1 + s1len;
|
|
|
e5695f |
+ end2 = t2 + s2len;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ memset (&state1, '\0', sizeof (mbstate_t));
|
|
|
e5695f |
+ memset (&state2, '\0', sizeof (mbstate_t));
|
|
|
e5695f |
+
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ c1 = *t1;
|
|
|
e5695f |
+ c2 = *t2;
|
|
|
e5695f |
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
|
|
|
e5695f |
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
|
|
|
e5695f |
+
|
|
|
e5695f |
+ /* Test for exact char equality first, since it's a common case. */
|
|
|
e5695f |
+ if (convfail1 ^ convfail2)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ else if (convfail1 && convfail2 && c1 != c2)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ else if (!convfail1 && !convfail2 && wc1 != wc2)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ switch (ignore_white_space)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ case IGNORE_ALL_SPACE:
|
|
|
e5695f |
+ /* For -w, just skip past any white space. */
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (convfail1)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ else if (wc1 == L'\n' || !iswspace (wc1))
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ c1 = *t1;
|
|
|
e5695f |
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (convfail2)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ else if (wc2 == L'\n' || !iswspace (wc2))
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ c2 = *t2;
|
|
|
e5695f |
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case IGNORE_SPACE_CHANGE:
|
|
|
e5695f |
+ /* For -b, advance past any sequence of white space in
|
|
|
e5695f |
+ line 1 and consider it just one space, or nothing at
|
|
|
e5695f |
+ all if it is at the end of the line. */
|
|
|
e5695f |
+ if (wc1 != L'\n' && iswspace (wc1))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ size_t mblen_bak;
|
|
|
e5695f |
+ mbstate_t state_bak;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ do
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ mblen_bak = mblen1;
|
|
|
e5695f |
+ state_bak = state1;
|
|
|
e5695f |
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ while (!convfail1 && (wc1 != L'\n' && iswspace (wc1)));
|
|
|
e5695f |
+
|
|
|
e5695f |
+ state1 = state_bak;
|
|
|
e5695f |
+ mblen1 = mblen_bak;
|
|
|
e5695f |
+ t1 -= mblen1;
|
|
|
e5695f |
+ convfail1 = 0;
|
|
|
e5695f |
+ wc1 = L' ';
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ /* Likewise for line 2. */
|
|
|
e5695f |
+ if (wc2 != L'\n' && iswspace (wc2))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ size_t mblen_bak;
|
|
|
e5695f |
+ mbstate_t state_bak;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ do
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ mblen_bak = mblen2;
|
|
|
e5695f |
+ state_bak = state2;
|
|
|
e5695f |
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ while (!convfail2 && (wc2 != L'\n' && iswspace (wc2)));
|
|
|
e5695f |
+
|
|
|
e5695f |
+ state2 = state_bak;
|
|
|
e5695f |
+ mblen2 = mblen_bak;
|
|
|
e5695f |
+ t2 -= mblen2;
|
|
|
e5695f |
+ convfail2 = 0;
|
|
|
e5695f |
+ wc2 = L' ';
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (wc1 != wc2)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ /* If we went too far when doing the simple test for
|
|
|
e5695f |
+ equality, go back to the first non-whitespace
|
|
|
e5695f |
+ character in both sides and try again. */
|
|
|
e5695f |
+ if (wc2 == L' ' && wc1 != L'\n' &&
|
|
|
e5695f |
+ t1 > s1 &&
|
|
|
e5695f |
+ !convfail1_bak && iswspace (wc1_bak))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t1 = t1_bak;
|
|
|
e5695f |
+ wc1 = wc1_bak;
|
|
|
e5695f |
+ state1 = state1_bak;
|
|
|
e5695f |
+ convfail1 = convfail1_bak;
|
|
|
e5695f |
+ continue;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ if (wc1 == L' ' && wc2 != L'\n'
|
|
|
e5695f |
+ && t2 > s2
|
|
|
e5695f |
+ && !convfail2_bak && iswspace (wc2_bak))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t2 = t2_bak;
|
|
|
e5695f |
+ wc2 = wc2_bak;
|
|
|
e5695f |
+ state2 = state2_bak;
|
|
|
e5695f |
+ convfail2 = convfail2_bak;
|
|
|
e5695f |
+ continue;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ t1_bak = t1; t2_bak = t2;
|
|
|
e5695f |
+ wc1_bak = wc1; wc2_bak = wc2;
|
|
|
e5695f |
+ state1_bak = state1; state2_bak = state2;
|
|
|
e5695f |
+ convfail1_bak = convfail1; convfail2_bak = convfail2;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (wc1 == L'\n')
|
|
|
e5695f |
+ wc1 = L' ';
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (wc2 == L'\n')
|
|
|
e5695f |
+ wc2 = L' ';
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case IGNORE_TRAILING_SPACE:
|
|
|
e5695f |
+ case IGNORE_TAB_EXPANSION_AND_TRAILING_SPACE:
|
|
|
e5695f |
+ if (iswspace (wc1) && iswspace (wc2))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ char const *p;
|
|
|
e5695f |
+ wchar_t wc;
|
|
|
e5695f |
+ size_t mblength;
|
|
|
e5695f |
+ int convfail;
|
|
|
e5695f |
+ mbstate_t state;
|
|
|
e5695f |
+ bool just_whitespace_left = 1;
|
|
|
e5695f |
+ if (wc1 != L'\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ mblength = mblen1;
|
|
|
e5695f |
+ p = t1;
|
|
|
e5695f |
+ memset (&state, '\0', sizeof(mbstate_t));
|
|
|
e5695f |
+ while (p < end1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ MBC2WC (p, end1, mblength, wc, state, convfail);
|
|
|
e5695f |
+ if (convfail || !iswspace (wc))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ just_whitespace_left = 0;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ if (just_whitespace_left && wc2 != L'\n')
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ mblength = mblen2;
|
|
|
e5695f |
+ p = t2;
|
|
|
e5695f |
+ memset (&state, '\0', sizeof(mbstate_t));
|
|
|
e5695f |
+ while (p < end2)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (*p == '\n')
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ p += mblength;
|
|
|
e5695f |
+ MBC2WC (p, end2, mblength, wc, state, convfail);
|
|
|
e5695f |
+ if (convfail || !iswspace (wc))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ just_whitespace_left = 0;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (just_whitespace_left)
|
|
|
e5695f |
+ /* Both lines have nothing but whitespace left. */
|
|
|
e5695f |
+ return false;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (ignore_white_space == IGNORE_TRAILING_SPACE)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ /* Fall through. */
|
|
|
e5695f |
+ case IGNORE_TAB_EXPANSION:
|
|
|
e5695f |
+ if ((wc1 == L' ' && wc2 == L'\t')
|
|
|
e5695f |
+ || (wc1 == L'\t' && wc2 == L' '))
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ size_t column2 = column;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (convfail1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++t1;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ else if (wc1 == L' ')
|
|
|
e5695f |
+ column++;
|
|
|
e5695f |
+ else if (wc1 == L'\t')
|
|
|
e5695f |
+ column += tabsize - column % tabsize;
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ c1 = *t1;
|
|
|
e5695f |
+ MBC2WC (t1, end1, mblen1, wc1, state1, convfail1);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ while (1)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (convfail2)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ ++t2;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ else if (wc2 == L' ')
|
|
|
e5695f |
+ column2++;
|
|
|
e5695f |
+ else if (wc2 == L'\t')
|
|
|
e5695f |
+ column2 += tabsize - column2 % tabsize;
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ c2 = *t2;
|
|
|
e5695f |
+ MBC2WC (t2, end2, mblen2, wc2, state2, convfail2);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (column != column2)
|
|
|
e5695f |
+ return 1;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ case IGNORE_NO_WHITE_SPACE:
|
|
|
e5695f |
+ t1 += mblen1;
|
|
|
e5695f |
+ t2 += mblen2;
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ /* Lowercase all letters if -i is specified. */
|
|
|
e5695f |
+ if (ignore_case)
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ if (!convfail1)
|
|
|
e5695f |
+ wc1 = towlower (wc1);
|
|
|
e5695f |
+ if (!convfail2)
|
|
|
e5695f |
+ wc2 = towlower (wc2);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (convfail1 ^ convfail2)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ else if (convfail1 && convfail2 && c1 != c2)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ else if (!convfail1 && !convfail2 && wc1 != wc2)
|
|
|
e5695f |
+ break;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+ else
|
|
|
e5695f |
+ {
|
|
|
e5695f |
+ t1_bak = t1; t2_bak = t2;
|
|
|
e5695f |
+ wc1_bak = wc1; wc2_bak = wc2;
|
|
|
e5695f |
+ state1_bak = state1; state2_bak = state2;
|
|
|
e5695f |
+ convfail1_bak = convfail1; convfail2_bak = convfail2;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ t1 += mblen1; t2 += mblen2;
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ if (!convfail1 && wc1 == L'\n')
|
|
|
e5695f |
+ return 0;
|
|
|
e5695f |
+
|
|
|
e5695f |
+ column += convfail1 ? 1 :
|
|
|
e5695f |
+ (wc1 == L'\t') ? tabsize - column % tabsize : wcwidth (wc1);
|
|
|
e5695f |
+ }
|
|
|
e5695f |
+
|
|
|
e5695f |
+ return 1;
|
|
|
e5695f |
+}
|
|
|
e5695f |
+#endif
|
|
|
e5695f |
|
|
|
e5695f |
/* Find the consecutive changes at the start of the script START.
|
|
|
e5695f |
Return the last link before the first gap. */
|