|
|
22c937 |
To: vim_dev@googlegroups.com
|
|
|
22c937 |
Subject: Patch 7.4.293
|
|
|
22c937 |
Fcc: outbox
|
|
|
22c937 |
From: Bram Moolenaar <Bram@moolenaar.net>
|
|
|
22c937 |
Mime-Version: 1.0
|
|
|
22c937 |
Content-Type: text/plain; charset=UTF-8
|
|
|
22c937 |
Content-Transfer-Encoding: 8bit
|
|
|
22c937 |
------------
|
|
|
22c937 |
|
|
|
22c937 |
Patch 7.4.293
|
|
|
22c937 |
Problem: It is not possible to ignore composing characters at a specific
|
|
|
22c937 |
point in a pattern.
|
|
|
22c937 |
Solution: Add the %C item.
|
|
|
22c937 |
Files: src/regexp.c, src/regexp_nfa.c, src/testdir/test95.in,
|
|
|
22c937 |
src/testdir/test95.ok, runtime/doc/pattern.txt
|
|
|
22c937 |
|
|
|
22c937 |
|
|
|
22c937 |
*** ../vim-7.4.292/src/regexp.c 2014-05-13 18:03:55.729737466 +0200
|
|
|
22c937 |
--- src/regexp.c 2014-05-13 18:27:08.725749659 +0200
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 244,249 ****
|
|
|
22c937 |
--- 244,250 ----
|
|
|
22c937 |
|
|
|
22c937 |
#define RE_MARK 207 /* mark cmp Match mark position */
|
|
|
22c937 |
#define RE_VISUAL 208 /* Match Visual area */
|
|
|
22c937 |
+ #define RE_COMPOSING 209 /* any composing characters */
|
|
|
22c937 |
|
|
|
22c937 |
/*
|
|
|
22c937 |
* Magic characters have a special meaning, they don't match literally.
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 2208,2213 ****
|
|
|
22c937 |
--- 2209,2218 ----
|
|
|
22c937 |
ret = regnode(RE_VISUAL);
|
|
|
22c937 |
break;
|
|
|
22c937 |
|
|
|
22c937 |
+ case 'C':
|
|
|
22c937 |
+ ret = regnode(RE_COMPOSING);
|
|
|
22c937 |
+ break;
|
|
|
22c937 |
+
|
|
|
22c937 |
/* \%[abc]: Emit as a list of branches, all ending at the last
|
|
|
22c937 |
* branch which matches nothing. */
|
|
|
22c937 |
case '[':
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 4710,4720 ****
|
|
|
22c937 |
status = RA_NOMATCH;
|
|
|
22c937 |
}
|
|
|
22c937 |
#ifdef FEAT_MBYTE
|
|
|
22c937 |
! /* Check for following composing character. */
|
|
|
22c937 |
if (status != RA_NOMATCH
|
|
|
22c937 |
&& enc_utf8
|
|
|
22c937 |
&& UTF_COMPOSINGLIKE(reginput, reginput + len)
|
|
|
22c937 |
! && !ireg_icombine)
|
|
|
22c937 |
{
|
|
|
22c937 |
/* raaron: This code makes a composing character get
|
|
|
22c937 |
* ignored, which is the correct behavior (sometimes)
|
|
|
22c937 |
--- 4715,4727 ----
|
|
|
22c937 |
status = RA_NOMATCH;
|
|
|
22c937 |
}
|
|
|
22c937 |
#ifdef FEAT_MBYTE
|
|
|
22c937 |
! /* Check for following composing character, unless %C
|
|
|
22c937 |
! * follows (skips over all composing chars). */
|
|
|
22c937 |
if (status != RA_NOMATCH
|
|
|
22c937 |
&& enc_utf8
|
|
|
22c937 |
&& UTF_COMPOSINGLIKE(reginput, reginput + len)
|
|
|
22c937 |
! && !ireg_icombine
|
|
|
22c937 |
! && OP(next) != RE_COMPOSING)
|
|
|
22c937 |
{
|
|
|
22c937 |
/* raaron: This code makes a composing character get
|
|
|
22c937 |
* ignored, which is the correct behavior (sometimes)
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 4791,4796 ****
|
|
|
22c937 |
--- 4798,4813 ----
|
|
|
22c937 |
status = RA_NOMATCH;
|
|
|
22c937 |
break;
|
|
|
22c937 |
#endif
|
|
|
22c937 |
+ case RE_COMPOSING:
|
|
|
22c937 |
+ #ifdef FEAT_MBYTE
|
|
|
22c937 |
+ if (enc_utf8)
|
|
|
22c937 |
+ {
|
|
|
22c937 |
+ /* Skip composing characters. */
|
|
|
22c937 |
+ while (utf_iscomposing(utf_ptr2char(reginput)))
|
|
|
22c937 |
+ mb_cptr_adv(reginput);
|
|
|
22c937 |
+ }
|
|
|
22c937 |
+ #endif
|
|
|
22c937 |
+ break;
|
|
|
22c937 |
|
|
|
22c937 |
case NOTHING:
|
|
|
22c937 |
break;
|
|
|
22c937 |
*** ../vim-7.4.292/src/regexp_nfa.c 2014-05-13 16:44:25.633695709 +0200
|
|
|
22c937 |
--- src/regexp_nfa.c 2014-05-13 19:25:58.285780556 +0200
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 81,86 ****
|
|
|
22c937 |
--- 81,87 ----
|
|
|
22c937 |
NFA_COMPOSING, /* Next nodes in NFA are part of the
|
|
|
22c937 |
composing multibyte char */
|
|
|
22c937 |
NFA_END_COMPOSING, /* End of a composing char in the NFA */
|
|
|
22c937 |
+ NFA_ANY_COMPOSING, /* \%C: Any composing characters. */
|
|
|
22c937 |
NFA_OPT_CHARS, /* \%[abc] */
|
|
|
22c937 |
|
|
|
22c937 |
/* The following are used only in the postfix form, not in the NFA */
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 1418,1423 ****
|
|
|
22c937 |
--- 1419,1428 ----
|
|
|
22c937 |
EMIT(NFA_VISUAL);
|
|
|
22c937 |
break;
|
|
|
22c937 |
|
|
|
22c937 |
+ case 'C':
|
|
|
22c937 |
+ EMIT(NFA_ANY_COMPOSING);
|
|
|
22c937 |
+ break;
|
|
|
22c937 |
+
|
|
|
22c937 |
case '[':
|
|
|
22c937 |
{
|
|
|
22c937 |
int n;
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 2429,2434 ****
|
|
|
22c937 |
--- 2434,2440 ----
|
|
|
22c937 |
case NFA_MARK_LT: STRCPY(code, "NFA_MARK_LT "); break;
|
|
|
22c937 |
case NFA_CURSOR: STRCPY(code, "NFA_CURSOR "); break;
|
|
|
22c937 |
case NFA_VISUAL: STRCPY(code, "NFA_VISUAL "); break;
|
|
|
22c937 |
+ case NFA_ANY_COMPOSING: STRCPY(code, "NFA_ANY_COMPOSING "); break;
|
|
|
22c937 |
|
|
|
22c937 |
case NFA_STAR: STRCPY(code, "NFA_STAR "); break;
|
|
|
22c937 |
case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 2967,2972 ****
|
|
|
22c937 |
--- 2973,2979 ----
|
|
|
22c937 |
case NFA_NLOWER_IC:
|
|
|
22c937 |
case NFA_UPPER_IC:
|
|
|
22c937 |
case NFA_NUPPER_IC:
|
|
|
22c937 |
+ case NFA_ANY_COMPOSING:
|
|
|
22c937 |
/* possibly non-ascii */
|
|
|
22c937 |
#ifdef FEAT_MBYTE
|
|
|
22c937 |
if (has_mbyte)
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 4152,4157 ****
|
|
|
22c937 |
--- 4159,4165 ----
|
|
|
22c937 |
continue;
|
|
|
22c937 |
|
|
|
22c937 |
case NFA_ANY:
|
|
|
22c937 |
+ case NFA_ANY_COMPOSING:
|
|
|
22c937 |
case NFA_IDENT:
|
|
|
22c937 |
case NFA_SIDENT:
|
|
|
22c937 |
case NFA_KWORD:
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 4395,4401 ****
|
|
|
22c937 |
switch (state->c)
|
|
|
22c937 |
{
|
|
|
22c937 |
case NFA_MATCH:
|
|
|
22c937 |
! nfa_match = TRUE;
|
|
|
22c937 |
break;
|
|
|
22c937 |
|
|
|
22c937 |
case NFA_SPLIT:
|
|
|
22c937 |
--- 4403,4409 ----
|
|
|
22c937 |
switch (state->c)
|
|
|
22c937 |
{
|
|
|
22c937 |
case NFA_MATCH:
|
|
|
22c937 |
! // nfa_match = TRUE;
|
|
|
22c937 |
break;
|
|
|
22c937 |
|
|
|
22c937 |
case NFA_SPLIT:
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 5151,5156 ****
|
|
|
22c937 |
--- 5159,5165 ----
|
|
|
22c937 |
|
|
|
22c937 |
case NFA_MATCH:
|
|
|
22c937 |
case NFA_MCLOSE:
|
|
|
22c937 |
+ case NFA_ANY_COMPOSING:
|
|
|
22c937 |
/* empty match works always */
|
|
|
22c937 |
return 0;
|
|
|
22c937 |
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 5573,5578 ****
|
|
|
22c937 |
--- 5582,5593 ----
|
|
|
22c937 |
{
|
|
|
22c937 |
case NFA_MATCH:
|
|
|
22c937 |
{
|
|
|
22c937 |
+ #ifdef FEAT_MBYTE
|
|
|
22c937 |
+ /* If the match ends before a composing characters and
|
|
|
22c937 |
+ * ireg_icombine is not set, that is not really a match. */
|
|
|
22c937 |
+ if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc))
|
|
|
22c937 |
+ break;
|
|
|
22c937 |
+ #endif
|
|
|
22c937 |
nfa_match = TRUE;
|
|
|
22c937 |
copy_sub(&submatch->norm, &t->subs.norm);
|
|
|
22c937 |
#ifdef FEAT_SYN_HL
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 6120,6125 ****
|
|
|
22c937 |
--- 6135,6157 ----
|
|
|
22c937 |
}
|
|
|
22c937 |
break;
|
|
|
22c937 |
|
|
|
22c937 |
+ case NFA_ANY_COMPOSING:
|
|
|
22c937 |
+ /* On a composing character skip over it. Otherwise do
|
|
|
22c937 |
+ * nothing. Always matches. */
|
|
|
22c937 |
+ #ifdef FEAT_MBYTE
|
|
|
22c937 |
+ if (enc_utf8 && utf_iscomposing(curc))
|
|
|
22c937 |
+ {
|
|
|
22c937 |
+ add_off = clen;
|
|
|
22c937 |
+ }
|
|
|
22c937 |
+ else
|
|
|
22c937 |
+ #endif
|
|
|
22c937 |
+ {
|
|
|
22c937 |
+ add_here = TRUE;
|
|
|
22c937 |
+ add_off = 0;
|
|
|
22c937 |
+ }
|
|
|
22c937 |
+ add_state = t->state->out;
|
|
|
22c937 |
+ break;
|
|
|
22c937 |
+
|
|
|
22c937 |
/*
|
|
|
22c937 |
* Character classes like \a for alpha, \d for digit etc.
|
|
|
22c937 |
*/
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 6484,6495 ****
|
|
|
22c937 |
if (!result && ireg_ic)
|
|
|
22c937 |
result = MB_TOLOWER(c) == MB_TOLOWER(curc);
|
|
|
22c937 |
#ifdef FEAT_MBYTE
|
|
|
22c937 |
! /* If there is a composing character which is not being
|
|
|
22c937 |
! * ignored there can be no match. Match with composing
|
|
|
22c937 |
! * character uses NFA_COMPOSING above. */
|
|
|
22c937 |
! if (result && enc_utf8 && !ireg_icombine
|
|
|
22c937 |
! && clen != utf_char2len(curc))
|
|
|
22c937 |
! result = FALSE;
|
|
|
22c937 |
#endif
|
|
|
22c937 |
ADD_STATE_IF_MATCH(t->state);
|
|
|
22c937 |
break;
|
|
|
22c937 |
--- 6516,6525 ----
|
|
|
22c937 |
if (!result && ireg_ic)
|
|
|
22c937 |
result = MB_TOLOWER(c) == MB_TOLOWER(curc);
|
|
|
22c937 |
#ifdef FEAT_MBYTE
|
|
|
22c937 |
! /* If ireg_icombine is not set only skip over the character
|
|
|
22c937 |
! * itself. When it is set skip over composing characters. */
|
|
|
22c937 |
! if (result && enc_utf8 && !ireg_icombine)
|
|
|
22c937 |
! clen = utf_char2len(curc);
|
|
|
22c937 |
#endif
|
|
|
22c937 |
ADD_STATE_IF_MATCH(t->state);
|
|
|
22c937 |
break;
|
|
|
22c937 |
diff: ../vim-7.4.292/src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory
|
|
|
22c937 |
diff: src/testdir/test95.insrc/testdir/test95.ok,: No such file or directory
|
|
|
22c937 |
*** ../vim-7.4.292/runtime/doc/pattern.txt 2013-08-10 13:24:59.000000000 +0200
|
|
|
22c937 |
--- runtime/doc/pattern.txt 2014-05-13 18:59:57.621766895 +0200
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 545,550 ****
|
|
|
22c937 |
--- 545,551 ----
|
|
|
22c937 |
|/\%u| \%u \%u match specified multibyte character (eg \%u20ac)
|
|
|
22c937 |
|/\%U| \%U \%U match specified large multibyte character (eg
|
|
|
22c937 |
\%U12345678)
|
|
|
22c937 |
+ |/\%C| \%C \%C match any composing characters
|
|
|
22c937 |
|
|
|
22c937 |
Example matches ~
|
|
|
22c937 |
\<\I\i* or
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 1207,1218 ****
|
|
|
22c937 |
8. Composing characters *patterns-composing*
|
|
|
22c937 |
|
|
|
22c937 |
*/\Z*
|
|
|
22c937 |
! When "\Z" appears anywhere in the pattern, composing characters are ignored.
|
|
|
22c937 |
! Thus only the base characters need to match, the composing characters may be
|
|
|
22c937 |
! different and the number of composing characters may differ. Only relevant
|
|
|
22c937 |
! when 'encoding' is "utf-8".
|
|
|
22c937 |
Exception: If the pattern starts with one or more composing characters, these
|
|
|
22c937 |
must match.
|
|
|
22c937 |
|
|
|
22c937 |
When a composing character appears at the start of the pattern of after an
|
|
|
22c937 |
item that doesn't include the composing character, a match is found at any
|
|
|
22c937 |
--- 1208,1225 ----
|
|
|
22c937 |
8. Composing characters *patterns-composing*
|
|
|
22c937 |
|
|
|
22c937 |
*/\Z*
|
|
|
22c937 |
! When "\Z" appears anywhere in the pattern, all composing characters are
|
|
|
22c937 |
! ignored. Thus only the base characters need to match, the composing
|
|
|
22c937 |
! characters may be different and the number of composing characters may differ.
|
|
|
22c937 |
! Only relevant when 'encoding' is "utf-8".
|
|
|
22c937 |
Exception: If the pattern starts with one or more composing characters, these
|
|
|
22c937 |
must match.
|
|
|
22c937 |
+ */\%C*
|
|
|
22c937 |
+ Use "\%C" to skip any composing characters. For example, the pattern "a" does
|
|
|
22c937 |
+ not match in "càt" (where the a has the composing character 0x0300), but
|
|
|
22c937 |
+ "a\%C" does. Note that this does not match "cát" (where the á is character
|
|
|
22c937 |
+ 0xe1, it does not have a compositing character). It does match "cat" (where
|
|
|
22c937 |
+ the a is just an a).
|
|
|
22c937 |
|
|
|
22c937 |
When a composing character appears at the start of the pattern of after an
|
|
|
22c937 |
item that doesn't include the composing character, a match is found at any
|
|
|
22c937 |
*** ../vim-7.4.292/src/version.c 2014-05-13 18:03:55.729737466 +0200
|
|
|
22c937 |
--- src/version.c 2014-05-13 18:28:45.885750510 +0200
|
|
|
22c937 |
***************
|
|
|
22c937 |
*** 736,737 ****
|
|
|
22c937 |
--- 736,739 ----
|
|
|
22c937 |
{ /* Add new patch number below this line */
|
|
|
22c937 |
+ /**/
|
|
|
22c937 |
+ 293,
|
|
|
22c937 |
/**/
|
|
|
22c937 |
|
|
|
22c937 |
--
|
|
|
22c937 |
hundred-and-one symptoms of being an internet addict:
|
|
|
22c937 |
155. You forget to eat because you're too busy surfing the net.
|
|
|
22c937 |
|
|
|
22c937 |
/// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\
|
|
|
22c937 |
/// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
|
|
|
22c937 |
\\\ an exciting new programming language -- http://www.Zimbu.org ///
|
|
|
22c937 |
\\\ help me help AIDS victims -- http://ICCF-Holland.org ///
|