|
|
147e83 |
Upstream commit:
|
|
|
147e83 |
|
|
|
147e83 |
commit 7e2f0d2d77e4bc273fe00f99d970605d8e38d4d6
|
|
|
147e83 |
Author: Andreas Schwab <schwab@suse.de>
|
|
|
147e83 |
Date: Mon Feb 4 10:16:33 2013 +0100
|
|
|
147e83 |
|
|
|
147e83 |
Fix handling of collating symbols in regexps
|
|
|
147e83 |
|
|
|
147e83 |
From c1b97d6d896b1f22fdf5d28471ef7859ec840a57 Mon Sep 17 00:00:00 2001
|
|
|
147e83 |
From: Andreas Schwab <schwab@redhat.com>
|
|
|
147e83 |
Date: Wed, 1 Sep 2010 17:26:15 +0200
|
|
|
147e83 |
Subject: [PATCH] Fix handling of collating symbols in regexps
|
|
|
147e83 |
|
|
|
147e83 |
[BZ #11561]
|
|
|
147e83 |
* posix/regcomp.c (parse_bracket_exp): When looking up collating
|
|
|
147e83 |
elements compare against the byte sequence of it, not its name.
|
|
|
147e83 |
|
|
|
147e83 |
---
|
|
|
147e83 |
ChangeLog | 4 +++
|
|
|
147e83 |
posix/regcomp.c | 72 ++++++++++++++++++++----------------------------------
|
|
|
147e83 |
2 files changed, 31 insertions(+), 45 deletions(-)
|
|
|
147e83 |
|
|
|
147e83 |
--- glibc-2.17-c758a686/posix/regcomp.c
|
|
|
147e83 |
+++ glibc-2.17-c758a686/posix/regcomp.c
|
|
|
147e83 |
@@ -2772,40 +2772,29 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|
|
147e83 |
|
|
|
147e83 |
/* Local function for parse_bracket_exp used in _LIBC environement.
|
|
|
147e83 |
Seek the collating symbol entry correspondings to NAME.
|
|
|
147e83 |
- Return the index of the symbol in the SYMB_TABLE. */
|
|
|
147e83 |
+ Return the index of the symbol in the SYMB_TABLE,
|
|
|
147e83 |
+ or -1 if not found. */
|
|
|
147e83 |
|
|
|
147e83 |
auto inline int32_t
|
|
|
147e83 |
__attribute ((always_inline))
|
|
|
147e83 |
- seek_collating_symbol_entry (name, name_len)
|
|
|
147e83 |
- const unsigned char *name;
|
|
|
147e83 |
- size_t name_len;
|
|
|
147e83 |
+ seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
|
|
|
147e83 |
{
|
|
|
147e83 |
- int32_t hash = elem_hash ((const char *) name, name_len);
|
|
|
147e83 |
- int32_t elem = hash % table_size;
|
|
|
147e83 |
- if (symb_table[2 * elem] != 0)
|
|
|
147e83 |
- {
|
|
|
147e83 |
- int32_t second = hash % (table_size - 2) + 1;
|
|
|
147e83 |
-
|
|
|
147e83 |
- do
|
|
|
147e83 |
- {
|
|
|
147e83 |
- /* First compare the hashing value. */
|
|
|
147e83 |
- if (symb_table[2 * elem] == hash
|
|
|
147e83 |
- /* Compare the length of the name. */
|
|
|
147e83 |
- && name_len == extra[symb_table[2 * elem + 1]]
|
|
|
147e83 |
- /* Compare the name. */
|
|
|
147e83 |
- && memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
|
|
|
147e83 |
- name_len) == 0)
|
|
|
147e83 |
- {
|
|
|
147e83 |
- /* Yep, this is the entry. */
|
|
|
147e83 |
- break;
|
|
|
147e83 |
- }
|
|
|
147e83 |
+ int32_t elem;
|
|
|
147e83 |
|
|
|
147e83 |
- /* Next entry. */
|
|
|
147e83 |
- elem += second;
|
|
|
147e83 |
- }
|
|
|
147e83 |
- while (symb_table[2 * elem] != 0);
|
|
|
147e83 |
- }
|
|
|
147e83 |
- return elem;
|
|
|
147e83 |
+ for (elem = 0; elem < table_size; elem++)
|
|
|
147e83 |
+ if (symb_table[2 * elem] != 0)
|
|
|
147e83 |
+ {
|
|
|
147e83 |
+ int32_t idx = symb_table[2 * elem + 1];
|
|
|
147e83 |
+ /* Skip the name of collating element name. */
|
|
|
147e83 |
+ idx += 1 + extra[idx];
|
|
|
147e83 |
+ if (/* Compare the length of the name. */
|
|
|
147e83 |
+ name_len == extra[idx]
|
|
|
147e83 |
+ /* Compare the name. */
|
|
|
147e83 |
+ && memcmp (name, &extra[idx + 1], name_len) == 0)
|
|
|
147e83 |
+ /* Yep, this is the entry. */
|
|
|
147e83 |
+ return elem;
|
|
|
147e83 |
+ }
|
|
|
147e83 |
+ return -1;
|
|
|
147e83 |
}
|
|
|
147e83 |
|
|
|
147e83 |
/* Local function for parse_bracket_exp used in _LIBC environment.
|
|
|
147e83 |
@@ -2814,8 +2803,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|
|
147e83 |
|
|
|
147e83 |
auto inline unsigned int
|
|
|
147e83 |
__attribute ((always_inline))
|
|
|
147e83 |
- lookup_collation_sequence_value (br_elem)
|
|
|
147e83 |
- bracket_elem_t *br_elem;
|
|
|
147e83 |
+ lookup_collation_sequence_value (bracket_elem_t *br_elem)
|
|
|
147e83 |
{
|
|
|
147e83 |
if (br_elem->type == SB_CHAR)
|
|
|
147e83 |
{
|
|
|
147e83 |
@@ -2843,7 +2831,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|
|
147e83 |
int32_t elem, idx;
|
|
|
147e83 |
elem = seek_collating_symbol_entry (br_elem->opr.name,
|
|
|
147e83 |
sym_name_len);
|
|
|
147e83 |
- if (symb_table[2 * elem] != 0)
|
|
|
147e83 |
+ if (elem != -1)
|
|
|
147e83 |
{
|
|
|
147e83 |
/* We found the entry. */
|
|
|
147e83 |
idx = symb_table[2 * elem + 1];
|
|
|
147e83 |
@@ -2861,7 +2849,7 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|
|
147e83 |
/* Return the collation sequence value. */
|
|
|
147e83 |
return *(unsigned int *) (extra + idx);
|
|
|
147e83 |
}
|
|
|
147e83 |
- else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
|
|
|
147e83 |
+ else if (sym_name_len == 1)
|
|
|
147e83 |
{
|
|
|
147e83 |
/* No valid character. Match it as a single byte
|
|
|
147e83 |
character. */
|
|
|
147e83 |
@@ -2883,11 +2871,8 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|
|
147e83 |
|
|
|
147e83 |
auto inline reg_errcode_t
|
|
|
147e83 |
__attribute ((always_inline))
|
|
|
147e83 |
- build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
|
|
|
147e83 |
- re_charset_t *mbcset;
|
|
|
147e83 |
- int *range_alloc;
|
|
|
147e83 |
- bitset_t sbcset;
|
|
|
147e83 |
- bracket_elem_t *start_elem, *end_elem;
|
|
|
147e83 |
+ build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
|
|
|
147e83 |
+ bracket_elem_t *start_elem, bracket_elem_t *end_elem)
|
|
|
147e83 |
{
|
|
|
147e83 |
unsigned int ch;
|
|
|
147e83 |
uint32_t start_collseq;
|
|
|
147e83 |
@@ -2966,25 +2951,22 @@ parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,
|
|
|
147e83 |
|
|
|
147e83 |
auto inline reg_errcode_t
|
|
|
147e83 |
__attribute ((always_inline))
|
|
|
147e83 |
- build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
|
|
|
147e83 |
- re_charset_t *mbcset;
|
|
|
147e83 |
- int *coll_sym_alloc;
|
|
|
147e83 |
- bitset_t sbcset;
|
|
|
147e83 |
- const unsigned char *name;
|
|
|
147e83 |
+ build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
|
|
|
147e83 |
+ int *coll_sym_alloc, const unsigned char *name)
|
|
|
147e83 |
{
|
|
|
147e83 |
int32_t elem, idx;
|
|
|
147e83 |
size_t name_len = strlen ((const char *) name);
|
|
|
147e83 |
if (nrules != 0)
|
|
|
147e83 |
{
|
|
|
147e83 |
elem = seek_collating_symbol_entry (name, name_len);
|
|
|
147e83 |
- if (symb_table[2 * elem] != 0)
|
|
|
147e83 |
+ if (elem != -1)
|
|
|
147e83 |
{
|
|
|
147e83 |
/* We found the entry. */
|
|
|
147e83 |
idx = symb_table[2 * elem + 1];
|
|
|
147e83 |
/* Skip the name of collating element name. */
|
|
|
147e83 |
idx += 1 + extra[idx];
|
|
|
147e83 |
}
|
|
|
147e83 |
- else if (symb_table[2 * elem] == 0 && name_len == 1)
|
|
|
147e83 |
+ else if (name_len == 1)
|
|
|
147e83 |
{
|
|
|
147e83 |
/* No valid character, treat it as a normal
|
|
|
147e83 |
character. */
|