patches/glibc/2.9/140-regex-BZ9697.patch
author "Yann E. MORIN" <yann.morin.1998@anciens.enib.fr>
Sun Jan 17 23:06:02 2010 +0100 (2010-01-17)
changeset 1740 c57458bb354d
parent 1201 c9967a6e3b25
permissions -rw-r--r--
configure: do not require hg when configuring in an hg clone

When configuring in an hg clone, we need hg to compute the version string.
It can happen that users do not have Mercurial (eg. if they got a snapshot
rather that they did a full clone). In this case, we can still run, of
course, so simply fill the version string with a sufficiently explicit
value, that does not require hg. The date is a good candidate.
     1 Original patch from: gentoo/src/patchsets/glibc/2.9/0052_all_glibc-2.9-regex-BZ9697.patch
     2 
     3 -= BEGIN original header =-
     4 http://sourceware.org/ml/libc-alpha/2009-01/msg00005.html
     5 
     6 From ea8ca0dfcbf2721bcf2c08ce3c01d5764b827572 Mon Sep 17 00:00:00 2001
     7 From: Ulrich Drepper <drepper@redhat.com>
     8 Date: Thu, 8 Jan 2009 00:42:28 +0000
     9 Subject: [PATCH] (re_compile_fastmap_iter): Rewrite COMPLEX_BRACKET handling.
    10 
    11 -= END original header =-
    12 
    13 diff -durN glibc-2_9.orig/posix/regcomp.c glibc-2_9/posix/regcomp.c
    14 --- glibc-2_9.orig/posix/regcomp.c	2008-05-15 05:07:21.000000000 +0200
    15 +++ glibc-2_9/posix/regcomp.c	2009-02-02 22:00:41.000000000 +0100
    16 @@ -350,47 +350,67 @@
    17  #ifdef RE_ENABLE_I18N
    18        else if (type == COMPLEX_BRACKET)
    19  	{
    20 -	  int i;
    21  	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;
    22 -	  if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes
    23 -	      || cset->nranges || cset->nchar_classes)
    24 -	    {
    25 +	  int i;
    26 +
    27  # ifdef _LIBC
    28 -	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0)
    29 +	  /* See if we have to try all bytes which start multiple collation
    30 +	     elements.
    31 +	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid
    32 +		  collation element, and don't catch 'b' since 'b' is
    33 +		  the only collation element which starts from 'b' (and
    34 +		  it is caught by SIMPLE_BRACKET).  */
    35 +	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
    36 +		  && (cset->ncoll_syms || cset->nranges))
    37  		{
    38 -		  /* In this case we want to catch the bytes which are
    39 -		     the first byte of any collation elements.
    40 -		     e.g. In da_DK, we want to catch 'a' since "aa"
    41 -			  is a valid collation element, and don't catch
    42 -			  'b' since 'b' is the only collation element
    43 -			  which starts from 'b'.  */
    44  		  const int32_t *table = (const int32_t *)
    45  		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
    46  		  for (i = 0; i < SBC_MAX; ++i)
    47  		    if (table[i] < 0)
    48  		      re_set_fastmap (fastmap, icase, i);
    49  		}
    50 -# else
    51 -	      if (dfa->mb_cur_max > 1)
    52 -		for (i = 0; i < SBC_MAX; ++i)
    53 -		  if (__btowc (i) == WEOF)
    54 -		    re_set_fastmap (fastmap, icase, i);
    55 -# endif /* not _LIBC */
    56 -	    }
    57 -	  for (i = 0; i < cset->nmbchars; ++i)
    58 +# endif /* _LIBC */
    59 +
    60 +	  /* See if we have to start the match at all multibyte characters,
    61 +	     i.e. where we would not find an invalid sequence.  This only
    62 +	     applies to multibyte character sets; for single byte character
    63 +	     sets, the SIMPLE_BRACKET again suffices.  */
    64 +	  if (dfa->mb_cur_max > 1
    65 +	      && (cset->nchar_classes || cset->non_match
    66 +# ifdef _LIBC
    67 +		  || cset->nequiv_classes
    68 +# endif /* _LIBC */
    69 +		 ))
    70  	    {
    71 -	      char buf[256];
    72 -	      mbstate_t state;
    73 -	      memset (&state, '\0', sizeof (state));
    74 -	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
    75 -		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
    76 -	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
    77 +	      unsigned char c = 0;
    78 +	      do
    79  		{
    80 -		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
    81 -		      != (size_t) -1)
    82 -		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
    83 +		  mbstate_t mbs;
    84 +		  memset (&mbs, 0, sizeof (mbs));
    85 +		  if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
    86 +		    re_set_fastmap (fastmap, false, (int) c);
    87  		}
    88 +	      while (++c != 0);
    89  	    }
    90 +
    91 +	  else
    92 +	    {
    93 +	      /* ... Else catch all bytes which can start the mbchars.  */
    94 +	      for (i = 0; i < cset->nmbchars; ++i)
    95 +		{
    96 +		  char buf[256];
    97 +		  mbstate_t state;
    98 +		  memset (&state, '\0', sizeof (state));
    99 +		  if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
   100 +		    re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
   101 +		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
   102 +		    {
   103 +		      if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
   104 +			  != (size_t) -1)
   105 +			re_set_fastmap (fastmap, false, *(unsigned char *) buf);
   106 +		    }
   107 + 		}
   108 + 	    }
   109  	}
   110  #endif /* RE_ENABLE_I18N */
   111        else if (type == OP_PERIOD