patches/glibc/2.9/140-regex-BZ9697.patch
branchnewlib
changeset 1365 c4d124ed9f8e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/patches/glibc/2.9/140-regex-BZ9697.patch	Sun Apr 19 16:17:11 2009 +0000
     1.3 @@ -0,0 +1,111 @@
     1.4 +Original patch from: gentoo/src/patchsets/glibc/2.9/0052_all_glibc-2.9-regex-BZ9697.patch
     1.5 +
     1.6 +-= BEGIN original header =-
     1.7 +http://sourceware.org/ml/libc-alpha/2009-01/msg00005.html
     1.8 +
     1.9 +From ea8ca0dfcbf2721bcf2c08ce3c01d5764b827572 Mon Sep 17 00:00:00 2001
    1.10 +From: Ulrich Drepper <drepper@redhat.com>
    1.11 +Date: Thu, 8 Jan 2009 00:42:28 +0000
    1.12 +Subject: [PATCH] (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET handling.
    1.13 +
    1.14 +-= END original header =-
    1.15 +
    1.16 +diff -durN glibc-2_9.orig/posix/regcomp.c glibc-2_9/posix/regcomp.c
    1.17 +--- glibc-2_9.orig/posix/regcomp.c	2008-05-15 05:07:21.000000000 +0200
    1.18 ++++ glibc-2_9/posix/regcomp.c	2009-02-02 22:00:41.000000000 +0100
    1.19 +@@ -350,47 +350,67 @@
    1.20 + #ifdef RE_ENABLE_I18N
    1.21 +       else if (type == COMPLEX_BRACKET)
    1.22 + 	{
    1.23 +-	  int i;
    1.24 + 	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
    1.25 +-	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
    1.26 +-	      || cset->nranges || cset->nchar_classes)
    1.27 +-	    {
    1.28 ++	  int i;
    1.29 ++
    1.30 + # ifdef _LIBC
    1.31 +-	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
    1.32 ++	  /* See if we have to try all bytes which start multiple collation
    1.33 ++	     elements.
    1.34 ++	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
    1.35 ++		  collation element, and don't catch 'b' since 'b' is
    1.36 ++		  the only collation element which starts from 'b' (and
    1.37 ++		  it is caught by SIMPLE_BRACKET).  */
    1.38 ++	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
    1.39 ++		  && (cset->ncoll_syms || cset->nranges))
    1.40 + 		{
    1.41 +-		  /* In this case we want to catch the bytes which are
    1.42 +-		     the first byte of any collation elements.
    1.43 +-		     e.g. In da_DK, we want to catch 'a' since "aa"
    1.44 +-			  is a valid collation element, and don't catch
    1.45 +-			  'b' since 'b' is the only collation element
    1.46 +-			  which starts from 'b'.  */
    1.47 + 		  const int32_t *table = (const int32_t *)
    1.48 + 		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
    1.49 + 		  for (i = 0; i < SBC_MAX; ++i)
    1.50 + 		    if (table[i] < 0)
    1.51 + 		      re_set_fastmap (fastmap, icase, i);
    1.52 + 		}
    1.53 +-# else
    1.54 +-	      if (dfa->mb_cur_max > 1)
    1.55 +-		for (i = 0; i < SBC_MAX; ++i)
    1.56 +-		  if (__btowc (i) == WEOF)
    1.57 +-		    re_set_fastmap (fastmap, icase, i);
    1.58 +-# endif /* not _LIBC */
    1.59 +-	    }
    1.60 +-	  for (i = 0; i < cset->nmbchars; ++i)
    1.61 ++# endif /* _LIBC */
    1.62 ++
    1.63 ++	  /* See if we have to start the match at all multibyte characters,
    1.64 ++	     i.e. where we would not find an invalid sequence.  This only
    1.65 ++	     applies to multibyte character sets; for single byte character
    1.66 ++	     sets, the SIMPLE_BRACKET again suffices.  */
    1.67 ++	  if (dfa->mb_cur_max > 1
    1.68 ++	      && (cset->nchar_classes || cset->non_match
    1.69 ++# ifdef _LIBC
    1.70 ++		  || cset->nequiv_classes
    1.71 ++# endif /* _LIBC */
    1.72 ++		 ))
    1.73 + 	    {
    1.74 +-	      char buf[256];
    1.75 +-	      mbstate_t state;
    1.76 +-	      memset (&state, '\0', sizeof (state));
    1.77 +-	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
    1.78 +-		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
    1.79 +-	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
    1.80 ++	      unsigned char c = 0;
    1.81 ++	      do
    1.82 + 		{
    1.83 +-		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
    1.84 +-		      != (size_t) -1)
    1.85 +-		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
    1.86 ++		  mbstate_t mbs;
    1.87 ++		  memset (&mbs, 0, sizeof (mbs));
    1.88 ++		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
    1.89 ++		    re_set_fastmap (fastmap, false, (int) c);
    1.90 + 		}
    1.91 ++	      while (++c != 0);
    1.92 + 	    }
    1.93 ++
    1.94 ++	  else
    1.95 ++	    {
    1.96 ++	      /* ... Else catch all bytes which can start the mbchars.  */
    1.97 ++	      for (i = 0; i < cset->nmbchars; ++i)
    1.98 ++		{
    1.99 ++		  char buf[256];
   1.100 ++		  mbstate_t state;
   1.101 ++		  memset (&state, '\0', sizeof (state));
   1.102 ++		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
   1.103 ++		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
   1.104 ++		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
   1.105 ++		    {
   1.106 ++		      if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
   1.107 ++			  != (size_t) -1)
   1.108 ++			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
   1.109 ++		    }
   1.110 ++ 		}
   1.111 ++ 	    }
   1.112 + 	}
   1.113 + #endif /* RE_ENABLE_I18N */
   1.114 +       else if (type == OP_PERIOD