yann@1201: Original patch from: gentoo/src/patchsets/glibc/2.9/0052_all_glibc-2.9-regex-BZ9697.patch yann@1201: yann@1201: -= BEGIN original header =- yann@1201: http://sourceware.org/ml/libc-alpha/2009-01/msg00005.html yann@1201: yann@1201: From ea8ca0dfcbf2721bcf2c08ce3c01d5764b827572 Mon Sep 17 00:00:00 2001 yann@1201: From: Ulrich Drepper yann@1201: Date: Thu, 8 Jan 2009 00:42:28 +0000 yann@1201: Subject: [PATCH] (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET handling. yann@1201: yann@1201: -= END original header =- yann@1201: yann@1201: diff -durN glibc-2_9.orig/posix/regcomp.c glibc-2_9/posix/regcomp.c yann@1201: --- glibc-2_9.orig/posix/regcomp.c 2008-05-15 05:07:21.000000000 +0200 yann@1201: +++ glibc-2_9/posix/regcomp.c 2009-02-02 22:00:41.000000000 +0100 yann@1201: @@ -350,47 +350,67 @@ yann@1201: #ifdef RE_ENABLE_I18N yann@1201: else if (type == COMPLEX_BRACKET) yann@1201: { yann@1201: - int i; yann@1201: re_charset_t *cset = dfa->nodes[node].opr.mbcset; yann@1201: - if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes yann@1201: - || cset->nranges || cset->nchar_classes) yann@1201: - { yann@1201: + int i; yann@1201: + yann@1201: # ifdef _LIBC yann@1201: - if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) yann@1201: + /* See if we have to try all bytes which start multiple collation yann@1201: + elements. yann@1201: + e.g. In da_DK, we want to catch 'a' since "aa" is a valid yann@1201: + collation element, and don't catch 'b' since 'b' is yann@1201: + the only collation element which starts from 'b' (and yann@1201: + it is caught by SIMPLE_BRACKET). */ yann@1201: + if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0 yann@1201: + && (cset->ncoll_syms || cset->nranges)) yann@1201: { yann@1201: - /* In this case we want to catch the bytes which are yann@1201: - the first byte of any collation elements. yann@1201: - e.g. In da_DK, we want to catch 'a' since "aa" yann@1201: - is a valid collation element, and don't catch yann@1201: - 'b' since 'b' is the only collation element yann@1201: - which starts from 'b'. */ yann@1201: const int32_t *table = (const int32_t *) yann@1201: _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); yann@1201: for (i = 0; i < SBC_MAX; ++i) yann@1201: if (table[i] < 0) yann@1201: re_set_fastmap (fastmap, icase, i); yann@1201: } yann@1201: -# else yann@1201: - if (dfa->mb_cur_max > 1) yann@1201: - for (i = 0; i < SBC_MAX; ++i) yann@1201: - if (__btowc (i) == WEOF) yann@1201: - re_set_fastmap (fastmap, icase, i); yann@1201: -# endif /* not _LIBC */ yann@1201: - } yann@1201: - for (i = 0; i < cset->nmbchars; ++i) yann@1201: +# endif /* _LIBC */ yann@1201: + yann@1201: + /* See if we have to start the match at all multibyte characters, yann@1201: + i.e. where we would not find an invalid sequence. This only yann@1201: + applies to multibyte character sets; for single byte character yann@1201: + sets, the SIMPLE_BRACKET again suffices. */ yann@1201: + if (dfa->mb_cur_max > 1 yann@1201: + && (cset->nchar_classes || cset->non_match yann@1201: +# ifdef _LIBC yann@1201: + || cset->nequiv_classes yann@1201: +# endif /* _LIBC */ yann@1201: + )) yann@1201: { yann@1201: - char buf[256]; yann@1201: - mbstate_t state; yann@1201: - memset (&state, '\0', sizeof (state)); yann@1201: - if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) yann@1201: - re_set_fastmap (fastmap, icase, *(unsigned char *) buf); yann@1201: - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) yann@1201: + unsigned char c = 0; yann@1201: + do yann@1201: { yann@1201: - if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) yann@1201: - != (size_t) -1) yann@1201: - re_set_fastmap (fastmap, 0, *(unsigned char *) buf); yann@1201: + mbstate_t mbs; yann@1201: + memset (&mbs, 0, sizeof (mbs)); yann@1201: + if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2) yann@1201: + re_set_fastmap (fastmap, false, (int) c); yann@1201: } yann@1201: + while (++c != 0); yann@1201: } yann@1201: + yann@1201: + else yann@1201: + { yann@1201: + /* ... Else catch all bytes which can start the mbchars. */ yann@1201: + for (i = 0; i < cset->nmbchars; ++i) yann@1201: + { yann@1201: + char buf[256]; yann@1201: + mbstate_t state; yann@1201: + memset (&state, '\0', sizeof (state)); yann@1201: + if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) yann@1201: + re_set_fastmap (fastmap, icase, *(unsigned char *) buf); yann@1201: + if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) yann@1201: + { yann@1201: + if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) yann@1201: + != (size_t) -1) yann@1201: + re_set_fastmap (fastmap, false, *(unsigned char *) buf); yann@1201: + } yann@1201: + } yann@1201: + } yann@1201: } yann@1201: #endif /* RE_ENABLE_I18N */ yann@1201: else if (type == OP_PERIOD