LCOV - code coverage report
Current view: top level - lib - regex_internal.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 397 810 49.0 %
Date: 2018-01-30 Functions: 28 34 82.4 %

          Line data    Source code
       1             : /* -*- buffer-read-only: t -*- vi: set ro: */
       2             : /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
       3             : #line 1
       4             : /* Extended regular expression matching and search library.
       5             :    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software
       6             :    Foundation, Inc.
       7             :    This file is part of the GNU C Library.
       8             :    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
       9             : 
      10             :    This program is free software; you can redistribute it and/or modify
      11             :    it under the terms of the GNU General Public License as published by
      12             :    the Free Software Foundation; either version 3, or (at your option)
      13             :    any later version.
      14             : 
      15             :    This program is distributed in the hope that it will be useful,
      16             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      18             :    GNU General Public License for more details.
      19             : 
      20             :    You should have received a copy of the GNU General Public License along
      21             :    with this program; if not, write to the Free Software Foundation,
      22             :    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
      23             : 
      24             : static void re_string_construct_common (const char *str, Idx len,
      25             :                                         re_string_t *pstr,
      26             :                                         RE_TRANSLATE_TYPE trans, bool icase,
      27             :                                         const re_dfa_t *dfa) internal_function;
      28             : static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
      29             :                                           const re_node_set *nodes,
      30             :                                           re_hashval_t hash) internal_function;
      31             : static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
      32             :                                           const re_node_set *nodes,
      33             :                                           unsigned int context,
      34             :                                           re_hashval_t hash) internal_function;
      35             : 
      36             : /* Functions for string operation.  */
      37             : 
      38             : /* This function allocate the buffers.  It is necessary to call
      39             :    re_string_reconstruct before using the object.  */
      40         356 : 
      41             : static reg_errcode_t
      42             : internal_function
      43             : re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
      44             :                     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
      45             : {
      46             :   reg_errcode_t ret;
      47         356 :   Idx init_buf_len;
      48           0 : 
      49         356 :   /* Ensure at least one character fits into the buffers.  */
      50         356 :   if (init_len < dfa->mb_cur_max)
      51             :     init_len = dfa->mb_cur_max;
      52         356 :   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
      53         356 :   re_string_construct_common (str, len, pstr, trans, icase, dfa);
      54           0 : 
      55             :   ret = re_string_realloc_buffers (pstr, init_buf_len);
      56         356 :   if (BE (ret != REG_NOERROR, 0))
      57         356 :     return ret;
      58         356 : 
      59         356 :   pstr->word_char = dfa->word_char;
      60         356 :   pstr->word_ops_used = dfa->word_ops_used;
      61         356 :   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
      62             :   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
      63             :   pstr->valid_raw_len = pstr->valid_len;
      64             :   return REG_NOERROR;
      65             : }
      66             : 
      67             : /* This function allocate the buffers, and initialize them.  */
      68         321 : 
      69             : static reg_errcode_t
      70             : internal_function
      71             : re_string_construct (re_string_t *pstr, const char *str, Idx len,
      72         321 :                      RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
      73         321 : {
      74             :   reg_errcode_t ret;
      75         321 :   memset (pstr, '\0', sizeof (re_string_t));
      76             :   re_string_construct_common (str, len, pstr, trans, icase, dfa);
      77         286 : 
      78         286 :   if (len > 0)
      79           0 :     {
      80             :       ret = re_string_realloc_buffers (pstr, len + 1);
      81         321 :       if (BE (ret != REG_NOERROR, 0))
      82             :         return ret;
      83         321 :     }
      84             :   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
      85             : 
      86           0 :   if (icase)
      87             :     {
      88             : #ifdef RE_ENABLE_I18N
      89             :       if (dfa->mb_cur_max > 1)
      90           0 :         {
      91           0 :           while (1)
      92           0 :             {
      93           0 :               ret = build_wcs_upper_buffer (pstr);
      94           0 :               if (BE (ret != REG_NOERROR, 0))
      95           0 :                 return ret;
      96           0 :               if (pstr->valid_raw_len >= len)
      97           0 :                 break;
      98           0 :               if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
      99           0 :                 break;
     100             :               ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
     101             :               if (BE (ret != REG_NOERROR, 0))
     102             :                 return ret;
     103             :             }
     104           0 :         }
     105             :       else
     106             : #endif /* RE_ENABLE_I18N  */
     107             :         build_upper_buffer (pstr);
     108             :     }
     109         321 :   else
     110           0 :     {
     111             : #ifdef RE_ENABLE_I18N
     112             :       if (dfa->mb_cur_max > 1)
     113             :         build_wcs_buffer (pstr);
     114         321 :       else
     115           0 : #endif /* RE_ENABLE_I18N  */
     116             :         {
     117             :           if (trans != NULL)
     118         321 :             re_string_translate_buffer (pstr);
     119         321 :           else
     120             :             {
     121             :               pstr->valid_len = pstr->bufs_len;
     122             :               pstr->valid_raw_len = pstr->bufs_len;
     123             :             }
     124         321 :         }
     125             :     }
     126             : 
     127             :   return REG_NOERROR;
     128             : }
     129             : 
     130             : /* Helper functions for re_string_allocate, and re_string_construct.  */
     131         642 : 
     132             : static reg_errcode_t
     133             : internal_function
     134         642 : re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
     135             : {
     136             : #ifdef RE_ENABLE_I18N
     137             :   if (pstr->mb_cur_max > 1)
     138             :     {
     139           0 :       wint_t *new_wcs;
     140           0 : 
     141           0 :       /* Avoid overflow.  */
     142             :       size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
     143           0 :       if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
     144           0 :         return REG_ESPACE;
     145           0 : 
     146           0 :       new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
     147           0 :       if (BE (new_wcs == NULL, 0))
     148             :         return REG_ESPACE;
     149           0 :       pstr->wcs = new_wcs;
     150           0 :       if (pstr->offsets != NULL)
     151           0 :         {
     152           0 :           Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
     153             :           if (BE (new_offsets == NULL, 0))
     154             :             return REG_ESPACE;
     155             :           pstr->offsets = new_offsets;
     156         642 :         }
     157             :     }
     158           0 : #endif /* RE_ENABLE_I18N  */
     159             :   if (pstr->mbs_allocated)
     160           0 :     {
     161           0 :       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
     162           0 :                                            new_buf_len);
     163             :       if (BE (new_mbs == NULL, 0))
     164         642 :         return REG_ESPACE;
     165         642 :       pstr->mbs = new_mbs;
     166             :     }
     167             :   pstr->bufs_len = new_buf_len;
     168             :   return REG_NOERROR;
     169             : }
     170             : 
     171         677 : 
     172             : static void
     173             : internal_function
     174             : re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
     175         677 :                             RE_TRANSLATE_TYPE trans, bool icase,
     176         677 :                             const re_dfa_t *dfa)
     177         677 : {
     178         677 :   pstr->raw_mbs = (const unsigned char *) str;
     179         677 :   pstr->len = len;
     180         677 :   pstr->raw_len = len;
     181         677 :   pstr->trans = trans;
     182         677 :   pstr->icase = icase;
     183         677 :   pstr->mbs_allocated = (trans != NULL || icase);
     184         677 :   pstr->mb_cur_max = dfa->mb_cur_max;
     185         677 :   pstr->is_utf8 = dfa->is_utf8;
     186         677 :   pstr->map_notascii = dfa->map_notascii;
     187             :   pstr->stop = pstr->len;
     188             :   pstr->raw_stop = pstr->stop;
     189             : }
     190             : 
     191             : #ifdef RE_ENABLE_I18N
     192             : 
     193             : /* Build wide character buffer PSTR->WCS.
     194             :    If the byte sequence of the string are:
     195             :      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
     196             :    Then wide character buffer will be:
     197             :      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
     198             :    We use WEOF for padding, they indicate that the position isn't
     199             :    a first byte of a multibyte character.
     200             : 
     201             :    Note that this function assumes PSTR->VALID_LEN elements are already
     202             :    built and starts from PSTR->VALID_LEN.  */
     203           0 : 
     204             : static void
     205             : internal_function
     206             : build_wcs_buffer (re_string_t *pstr)
     207             : {
     208             : #ifdef _LIBC
     209             :   unsigned char buf[MB_LEN_MAX];
     210             :   assert (MB_LEN_MAX >= pstr->mb_cur_max);
     211             : #else
     212             :   unsigned char buf[64];
     213             : #endif
     214             :   mbstate_t prev_st;
     215             :   Idx byte_idx, end_idx, remain_len;
     216             :   size_t mbclen;
     217           0 : 
     218           0 :   /* Build the buffers from pstr->valid_len to either pstr->len or
     219             :      pstr->bufs_len.  */
     220             :   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
     221             :   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
     222             :     {
     223           0 :       wchar_t wc;
     224           0 :       const char *p;
     225             : 
     226           0 :       remain_len = end_idx - byte_idx;
     227             :       prev_st = pstr->cur_state;
     228             :       /* Apply the translation if we need.  */
     229             :       if (BE (pstr->trans != NULL, 0))
     230           0 :         {
     231             :           int i, ch;
     232           0 : 
     233           0 :           for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
     234             :             {
     235           0 :               ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
     236             :               buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
     237             :             }
     238           0 :           p = (const char *) buf;
     239           0 :         }
     240           0 :       else
     241             :         p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
     242             :       mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
     243           0 :       if (BE (mbclen == (size_t) -2, 0))
     244           0 :         {
     245             :           /* The buffer doesn't have enough space, finish to build.  */
     246           0 :           pstr->cur_state = prev_st;
     247             :           break;
     248             :         }
     249           0 :       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
     250           0 :         {
     251           0 :           /* We treat these cases as a singlebyte character.  */
     252           0 :           mbclen = 1;
     253           0 :           wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
     254             :           if (BE (pstr->trans != NULL, 0))
     255             :             wc = pstr->trans[wc];
     256             :           pstr->cur_state = prev_st;
     257           0 :         }
     258             : 
     259           0 :       /* Write wide character and padding.  */
     260           0 :       pstr->wcs[byte_idx++] = wc;
     261             :       /* Write paddings.  */
     262           0 :       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
     263           0 :         pstr->wcs[byte_idx++] = WEOF;
     264           0 :     }
     265             :   pstr->valid_len = byte_idx;
     266             :   pstr->valid_raw_len = byte_idx;
     267             : }
     268             : 
     269             : /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
     270             :    but for REG_ICASE.  */
     271           0 : 
     272             : static reg_errcode_t
     273             : internal_function
     274             : build_wcs_upper_buffer (re_string_t *pstr)
     275             : {
     276             :   mbstate_t prev_st;
     277             :   Idx src_idx, byte_idx, end_idx, remain_len;
     278             :   size_t mbclen;
     279             : #ifdef _LIBC
     280             :   char buf[MB_LEN_MAX];
     281             :   assert (MB_LEN_MAX >= pstr->mb_cur_max);
     282             : #else
     283           0 :   char buf[64];
     284           0 : #endif
     285             : 
     286             :   byte_idx = pstr->valid_len;
     287             :   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
     288           0 : 
     289             :   /* The following optimization assumes that ASCII characters can be
     290           0 :      mapped to wide characters with a simple cast.  */
     291             :   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
     292             :     {
     293             :       while (byte_idx < end_idx)
     294           0 :         {
     295           0 :           wchar_t wc;
     296             : 
     297             :           if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
     298           0 :               && mbsinit (&pstr->cur_state))
     299           0 :             {
     300             :               /* In case of a singlebyte character.  */
     301             :               pstr->mbs[byte_idx]
     302           0 :                 = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
     303           0 :               /* The next step uses the assumption that wchar_t is encoded
     304           0 :                  ASCII-safe: all ASCII values can be converted like this.  */
     305             :               pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
     306             :               ++byte_idx;
     307           0 :               continue;
     308           0 :             }
     309           0 : 
     310           0 :           remain_len = end_idx - byte_idx;
     311           0 :           prev_st = pstr->cur_state;
     312           0 :           mbclen = mbrtowc (&wc,
     313             :                             ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
     314           0 :                              + byte_idx), remain_len, &pstr->cur_state);
     315           0 :           if (BE (mbclen < (size_t) -2, 1))
     316             :             {
     317             :               wchar_t wcu = wc;
     318             :               if (iswlower (wc))
     319           0 :                 {
     320           0 :                   size_t mbcdlen;
     321           0 : 
     322           0 :                   wcu = towupper (wc);
     323             :                   mbcdlen = wcrtomb (buf, wcu, &prev_st);
     324             :                   if (BE (mbclen == mbcdlen, 1))
     325           0 :                     memcpy (pstr->mbs + byte_idx, buf, mbclen);
     326           0 :                   else
     327             :                     {
     328             :                       src_idx = byte_idx;
     329             :                       goto offsets_needed;
     330           0 :                     }
     331           0 :                 }
     332           0 :               else
     333             :                 memcpy (pstr->mbs + byte_idx,
     334           0 :                         pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
     335           0 :               pstr->wcs[byte_idx++] = wcu;
     336             :               /* Write paddings.  */
     337           0 :               for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
     338           0 :                 pstr->wcs[byte_idx++] = WEOF;
     339             :             }
     340           0 :           else if (mbclen == (size_t) -1 || mbclen == 0)
     341           0 :             {
     342             :               /* It is an invalid character or '\0'.  Just use the byte.  */
     343           0 :               int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
     344           0 :               pstr->mbs[byte_idx] = ch;
     345           0 :               /* And also cast it to wide char.  */
     346             :               pstr->wcs[byte_idx++] = (wchar_t) ch;
     347             :               if (BE (mbclen == (size_t) -1, 0))
     348             :                 pstr->cur_state = prev_st;
     349             :             }
     350           0 :           else
     351           0 :             {
     352             :               /* The buffer doesn't have enough space, finish to build.  */
     353             :               pstr->cur_state = prev_st;
     354           0 :               break;
     355           0 :             }
     356           0 :         }
     357             :       pstr->valid_len = byte_idx;
     358             :       pstr->valid_raw_len = byte_idx;
     359           0 :       return REG_NOERROR;
     360             :     }
     361             :   else
     362             :     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
     363           0 :       {
     364           0 :         wchar_t wc;
     365           0 :         const char *p;
     366           0 :       offsets_needed:
     367             :         remain_len = end_idx - byte_idx;
     368             :         prev_st = pstr->cur_state;
     369             :         if (BE (pstr->trans != NULL, 0))
     370           0 :           {
     371             :             int i, ch;
     372           0 : 
     373           0 :             for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
     374             :               {
     375           0 :                 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
     376             :                 buf[i] = pstr->trans[ch];
     377             :               }
     378           0 :             p = (const char *) buf;
     379           0 :           }
     380           0 :         else
     381             :           p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
     382           0 :         mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
     383           0 :         if (BE (mbclen < (size_t) -2, 1))
     384             :           {
     385             :             wchar_t wcu = wc;
     386             :             if (iswlower (wc))
     387           0 :               {
     388           0 :                 size_t mbcdlen;
     389           0 : 
     390           0 :                 wcu = towupper (wc);
     391           0 :                 mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
     392             :                 if (BE (mbclen == mbcdlen, 1))
     393             :                   memcpy (pstr->mbs + byte_idx, buf, mbclen);
     394             :                 else if (mbcdlen != (size_t) -1)
     395           0 :                   {
     396             :                     size_t i;
     397           0 : 
     398           0 :                     if (byte_idx + mbcdlen > pstr->bufs_len)
     399             :                       {
     400             :                         pstr->cur_state = prev_st;
     401           0 :                         break;
     402             :                       }
     403           0 : 
     404             :                     if (pstr->offsets == NULL)
     405           0 :                       {
     406           0 :                         pstr->offsets = re_malloc (Idx, pstr->bufs_len);
     407             : 
     408           0 :                         if (pstr->offsets == NULL)
     409             :                           return REG_ESPACE;
     410           0 :                       }
     411           0 :                     if (!pstr->offsets_needed)
     412           0 :                       {
     413             :                         for (i = 0; i < (size_t) byte_idx; ++i)
     414             :                           pstr->offsets[i] = i;
     415           0 :                         pstr->offsets_needed = 1;
     416           0 :                       }
     417           0 : 
     418           0 :                     memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
     419             :                     pstr->wcs[byte_idx] = wcu;
     420           0 :                     pstr->offsets[byte_idx] = src_idx;
     421           0 :                     for (i = 1; i < mbcdlen; ++i)
     422           0 :                       {
     423             :                         pstr->offsets[byte_idx + i]
     424           0 :                           = src_idx + (i < mbclen ? i : mbclen - 1);
     425           0 :                         pstr->wcs[byte_idx + i] = WEOF;
     426           0 :                       }
     427           0 :                     pstr->len += mbcdlen - mbclen;
     428             :                     if (pstr->raw_stop > src_idx)
     429           0 :                       pstr->stop += mbcdlen - mbclen;
     430           0 :                     end_idx = (pstr->bufs_len > pstr->len)
     431           0 :                               ? pstr->len : pstr->bufs_len;
     432             :                     byte_idx += mbcdlen;
     433             :                     src_idx += mbclen;
     434           0 :                     continue;
     435             :                   }
     436             :                 else
     437           0 :                   memcpy (pstr->mbs + byte_idx, p, mbclen);
     438             :               }
     439           0 :             else
     440             :               memcpy (pstr->mbs + byte_idx, p, mbclen);
     441             : 
     442           0 :             if (BE (pstr->offsets_needed != 0, 0))
     443           0 :               {
     444             :                 size_t i;
     445           0 :                 for (i = 0; i < mbclen; ++i)
     446             :                   pstr->offsets[byte_idx + i] = src_idx + i;
     447           0 :               }
     448             :             src_idx += mbclen;
     449           0 : 
     450           0 :             pstr->wcs[byte_idx++] = wcu;
     451             :             /* Write paddings.  */
     452           0 :             for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
     453           0 :               pstr->wcs[byte_idx++] = WEOF;
     454             :           }
     455           0 :         else if (mbclen == (size_t) -1 || mbclen == 0)
     456             :           {
     457           0 :             /* It is an invalid character or '\0'.  Just use the byte.  */
     458           0 :             int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
     459           0 : 
     460             :             if (BE (pstr->trans != NULL, 0))
     461           0 :               ch = pstr->trans [ch];
     462           0 :             pstr->mbs[byte_idx] = ch;
     463           0 : 
     464             :             if (BE (pstr->offsets_needed != 0, 0))
     465             :               pstr->offsets[byte_idx] = src_idx;
     466           0 :             ++src_idx;
     467           0 : 
     468           0 :             /* And also cast it to wide char.  */
     469             :             pstr->wcs[byte_idx++] = (wchar_t) ch;
     470             :             if (BE (mbclen == (size_t) -1, 0))
     471             :               pstr->cur_state = prev_st;
     472             :           }
     473           0 :         else
     474           0 :           {
     475             :             /* The buffer doesn't have enough space, finish to build.  */
     476             :             pstr->cur_state = prev_st;
     477           0 :             break;
     478           0 :           }
     479           0 :       }
     480             :   pstr->valid_len = byte_idx;
     481             :   pstr->valid_raw_len = src_idx;
     482             :   return REG_NOERROR;
     483             : }
     484             : 
     485             : /* Skip characters until the index becomes greater than NEW_RAW_IDX.
     486             :    Return the index.  */
     487           0 : 
     488             : static Idx
     489             : internal_function
     490             : re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
     491             : {
     492           0 :   mbstate_t prev_st;
     493             :   Idx rawbuf_idx;
     494             :   size_t mbclen;
     495           0 :   wint_t wc = WEOF;
     496             : 
     497             :   /* Skip the characters which are not necessary to check.  */
     498             :   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
     499             :        rawbuf_idx < new_raw_idx;)
     500           0 :     {
     501           0 :       wchar_t wc2;
     502           0 :       Idx remain_len;
     503             :       remain_len = pstr->len - rawbuf_idx;
     504           0 :       prev_st = pstr->cur_state;
     505             :       mbclen = mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
     506             :                         remain_len, &pstr->cur_state);
     507           0 :       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
     508           0 :         {
     509             :           /* We treat these cases as a single byte character.  */
     510           0 :           if (mbclen == 0 || remain_len == 0)
     511           0 :             wc = L'\0';
     512           0 :           else
     513             :             wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
     514             :           mbclen = 1;
     515           0 :           pstr->cur_state = prev_st;
     516             :         }
     517           0 :       else
     518             :         wc = wc2;
     519           0 :       /* Then proceed the next character.  */
     520           0 :       rawbuf_idx += mbclen;
     521             :     }
     522             :   *last_wc = wc;
     523             :   return rawbuf_idx;
     524             : }
     525             : #endif /* RE_ENABLE_I18N  */
     526             : 
     527             : /* Build the buffer PSTR->MBS, and apply the translation if we need.
     528             :    This function is used in case of REG_ICASE.  */
     529           0 : 
     530             : static void
     531             : internal_function
     532           0 : build_upper_buffer (re_string_t *pstr)
     533             : {
     534           0 :   Idx char_idx, end_idx;
     535             :   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
     536           0 : 
     537           0 :   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
     538           0 :     {
     539           0 :       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
     540           0 :       if (BE (pstr->trans != NULL, 0))
     541             :         ch = pstr->trans[ch];
     542           0 :       if (islower (ch))
     543             :         pstr->mbs[char_idx] = toupper (ch);
     544           0 :       else
     545           0 :         pstr->mbs[char_idx] = ch;
     546           0 :     }
     547             :   pstr->valid_len = char_idx;
     548             :   pstr->valid_raw_len = char_idx;
     549             : }
     550             : 
     551             : /* Apply TRANS to the buffer in PSTR.  */
     552           0 : 
     553             : static void
     554             : internal_function
     555           0 : re_string_translate_buffer (re_string_t *pstr)
     556             : {
     557           0 :   Idx buf_idx, end_idx;
     558             :   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
     559           0 : 
     560           0 :   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
     561             :     {
     562             :       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
     563           0 :       pstr->mbs[buf_idx] = pstr->trans[ch];
     564           0 :     }
     565           0 : 
     566             :   pstr->valid_len = buf_idx;
     567             :   pstr->valid_raw_len = buf_idx;
     568             : }
     569             : 
     570             : /* This function re-construct the buffers.
     571             :    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
     572             :    convert to upper case in case of REG_ICASE, apply translation.  */
     573         337 : 
     574             : static reg_errcode_t
     575             : internal_function
     576             : re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
     577         337 : {
     578         301 :   Idx offset;
     579             : 
     580             :   if (BE (pstr->raw_mbs_idx <= idx, 0))
     581             :     offset = idx - pstr->raw_mbs_idx;
     582             :   else
     583          36 :     {
     584           0 :       /* Reset buffer.  */
     585             : #ifdef RE_ENABLE_I18N
     586          36 :       if (pstr->mb_cur_max > 1)
     587          36 :         memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
     588          36 : #endif /* RE_ENABLE_I18N */
     589          36 :       pstr->len = pstr->raw_len;
     590          36 :       pstr->stop = pstr->raw_stop;
     591          36 :       pstr->valid_len = 0;
     592          72 :       pstr->raw_mbs_idx = 0;
     593          36 :       pstr->valid_raw_len = 0;
     594          36 :       pstr->offsets_needed = 0;
     595          36 :       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
     596          36 :                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
     597             :       if (!pstr->mbs_allocated)
     598             :         pstr->mbs = (unsigned char *) pstr->raw_mbs;
     599         337 :       offset = idx;
     600             :     }
     601             : 
     602         130 :   if (BE (offset != 0, 1))
     603             :     {
     604             :       /* Should the already checked characters be kept?  */
     605             :       if (BE (offset < pstr->valid_raw_len, 1))
     606          90 :         {
     607             :           /* Yes, move them to the front of the buffer.  */
     608           0 : #ifdef RE_ENABLE_I18N
     609             :           if (BE (pstr->offsets_needed, 0))
     610             :             {
     611           0 :               Idx low = 0, high = pstr->valid_len, mid;
     612           0 :               do
     613           0 :                 {
     614           0 :                   mid = (high + low) / 2;
     615           0 :                   if (pstr->offsets[mid] > offset)
     616             :                     high = mid;
     617           0 :                   else if (pstr->offsets[mid] < offset)
     618             :                     low = mid + 1;
     619           0 :                   else
     620           0 :                     break;
     621           0 :                 }
     622           0 :               while (low < high);
     623             :               if (pstr->offsets[mid] < offset)
     624             :                 ++mid;
     625             :               pstr->tip_context = re_string_context_at (pstr, mid - 1,
     626             :                                                         eflags);
     627             :               /* This can be quite complicated, so handle specially
     628           0 :                  only the common and easy case where the character with
     629           0 :                  different length representation of lower and upper
     630             :                  case is present at or after offset.  */
     631           0 :               if (pstr->valid_len > offset
     632           0 :                   && mid == offset && pstr->offsets[mid] == offset)
     633           0 :                 {
     634           0 :                   memmove (pstr->wcs, pstr->wcs + offset,
     635           0 :                            (pstr->valid_len - offset) * sizeof (wint_t));
     636           0 :                   memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
     637           0 :                   pstr->valid_len -= offset;
     638             :                   pstr->valid_raw_len -= offset;
     639             :                   for (low = 0; low < pstr->valid_len; low++)
     640             :                     pstr->offsets[low] = pstr->offsets[low + offset] - offset;
     641             :                 }
     642             :               else
     643           0 :                 {
     644           0 :                   /* Otherwise, just find out how long the partial multibyte
     645           0 :                      character at offset is and fill it with WEOF/255.  */
     646           0 :                   pstr->len = pstr->raw_len - idx + offset;
     647           0 :                   pstr->stop = pstr->raw_stop - idx + offset;
     648           0 :                   pstr->offsets_needed = 0;
     649           0 :                   while (mid > 0 && pstr->offsets[mid - 1] == offset)
     650           0 :                     --mid;
     651             :                   while (mid < pstr->valid_len)
     652           0 :                     if (pstr->wcs[mid] != WEOF)
     653           0 :                       break;
     654           0 :                     else
     655             :                       ++mid;
     656             :                   if (mid == pstr->valid_len)
     657           0 :                     pstr->valid_len = 0;
     658           0 :                   else
     659             :                     {
     660           0 :                       pstr->valid_len = pstr->offsets[mid] - offset;
     661           0 :                       if (pstr->valid_len)
     662           0 :                         {
     663             :                           for (low = 0; low < pstr->valid_len; ++low)
     664             :                             pstr->wcs[low] = WEOF;
     665           0 :                           memset (pstr->mbs, 255, pstr->valid_len);
     666             :                         }
     667             :                     }
     668             :                   pstr->valid_raw_len = pstr->valid_len;
     669             :                 }
     670             :             }
     671          90 :           else
     672             : #endif
     673             :             {
     674          90 :               pstr->tip_context = re_string_context_at (pstr, offset - 1,
     675           0 :                                                         eflags);
     676           0 : #ifdef RE_ENABLE_I18N
     677             :               if (pstr->mb_cur_max > 1)
     678          90 :                 memmove (pstr->wcs, pstr->wcs + offset,
     679           0 :                          (pstr->valid_len - offset) * sizeof (wint_t));
     680           0 : #endif /* RE_ENABLE_I18N */
     681          90 :               if (BE (pstr->mbs_allocated, 0))
     682          90 :                 memmove (pstr->mbs, pstr->mbs + offset,
     683             :                          pstr->valid_len - offset);
     684             :               pstr->valid_len -= offset;
     685             :               pstr->valid_raw_len -= offset;
     686             : #if DEBUG
     687             :               assert (pstr->valid_len > 0);
     688             : #endif
     689             :             }
     690             :         }
     691          40 :       else
     692             :         {
     693             :           /* No, skip all characters until IDX.  */
     694          40 :           Idx prev_valid_len = pstr->valid_len;
     695             : 
     696           0 : #ifdef RE_ENABLE_I18N
     697           0 :           if (BE (pstr->offsets_needed, 0))
     698           0 :             {
     699             :               pstr->len = pstr->raw_len - idx + offset;
     700             :               pstr->stop = pstr->raw_stop - idx + offset;
     701          40 :               pstr->offsets_needed = 0;
     702             :             }
     703          40 : #endif
     704             :           pstr->valid_len = 0;
     705             : #ifdef RE_ENABLE_I18N
     706           0 :           if (pstr->mb_cur_max > 1)
     707             :             {
     708           0 :               Idx wcs_idx;
     709             :               wint_t wc = WEOF;
     710             : 
     711             :               if (pstr->is_utf8)
     712             :                 {
     713             :                   const unsigned char *raw, *p, *end;
     714           0 : 
     715           0 :                   /* Special case UTF-8.  Multi-byte chars start with any
     716           0 :                      byte other than 0x80 - 0xbf.  */
     717           0 :                   raw = pstr->raw_mbs + pstr->raw_mbs_idx;
     718           0 :                   end = raw + (offset - pstr->mb_cur_max);
     719             :                   if (end < pstr->raw_mbs)
     720             :                     end = pstr->raw_mbs;
     721             :                   p = raw + offset - 1;
     722             : #ifdef _LIBC
     723             :                   /* We know the wchar_t encoding is UCS4, so for the simple
     724             :                      case, ASCII characters, skip the conversion step.  */
     725             :                   if (isascii (*p) && BE (pstr->trans == NULL, 1))
     726             :                     {
     727             :                       memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
     728             :                       /* pstr->valid_len = 0; */
     729             :                       wc = (wchar_t) *p;
     730           0 :                     }
     731           0 :                   else
     732             : #endif
     733             :                     for (; p >= end; --p)
     734             :                       if ((*p & 0xc0) != 0x80)
     735           0 :                         {
     736             :                           mbstate_t cur_state;
     737             :                           wchar_t wc2;
     738             :                           Idx mlen = raw + pstr->len - p;
     739           0 :                           unsigned char buf[6];
     740             :                           size_t mbclen;
     741           0 : 
     742           0 :                           if (BE (pstr->trans != NULL, 0))
     743           0 :                             {
     744             :                               int i = mlen < 6 ? mlen : 6;
     745             :                               while (--i >= 0)
     746             :                                 buf[i] = pstr->trans[p[i]];
     747           0 :                             }
     748           0 :                           /* XXX Don't use mbrtowc, we know which conversion
     749             :                              to use (UTF-8 -> UCS4).  */
     750           0 :                           memset (&cur_state, 0, sizeof (cur_state));
     751           0 :                           mbclen = mbrtowc (&wc2, (const char *) p, mlen,
     752             :                                             &cur_state);
     753           0 :                           if (raw + offset - p <= mbclen
     754             :                               && mbclen < (size_t) -2)
     755           0 :                             {
     756           0 :                               memset (&pstr->cur_state, '\0',
     757             :                                       sizeof (mbstate_t));
     758           0 :                               pstr->valid_len = mbclen - (raw + offset - p);
     759             :                               wc = wc2;
     760             :                             }
     761             :                           break;
     762           0 :                         }
     763           0 :                 }
     764           0 : 
     765             :               if (wc == WEOF)
     766           0 :                 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
     767             :               if (wc == WEOF)
     768           0 :                 pstr->tip_context
     769           0 :                   = re_string_context_at (pstr, prev_valid_len - 1, eflags);
     770             :               else
     771           0 :                 pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
     772           0 :                                       && IS_WIDE_WORD_CHAR (wc))
     773             :                                      ? CONTEXT_WORD
     774           0 :                                      : ((IS_WIDE_NEWLINE (wc)
     775             :                                          && pstr->newline_anchor)
     776           0 :                                         ? CONTEXT_NEWLINE : 0));
     777           0 :               if (BE (pstr->valid_len, 0))
     778           0 :                 {
     779           0 :                   for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
     780             :                     pstr->wcs[wcs_idx] = WEOF;
     781           0 :                   if (pstr->mbs_allocated)
     782             :                     memset (pstr->mbs, 255, pstr->valid_len);
     783             :                 }
     784             :               pstr->valid_raw_len = pstr->valid_len;
     785             :             }
     786          40 :           else
     787          40 : #endif /* RE_ENABLE_I18N */
     788          40 :             {
     789           0 :               int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
     790          80 :               pstr->valid_raw_len = 0;
     791             :               if (pstr->trans)
     792          40 :                 c = pstr->trans[c];
     793             :               pstr->tip_context = (bitset_contain (pstr->word_char, c)
     794             :                                    ? CONTEXT_WORD
     795             :                                    : ((IS_NEWLINE (c) && pstr->newline_anchor)
     796         130 :                                       ? CONTEXT_NEWLINE : 0));
     797         130 :             }
     798             :         }
     799         337 :       if (!BE (pstr->mbs_allocated, 0))
     800         337 :         pstr->mbs += offset;
     801         337 :     }
     802             :   pstr->raw_mbs_idx = idx;
     803             :   pstr->len -= offset;
     804             :   pstr->stop -= offset;
     805         337 : 
     806             :   /* Then build the buffers.  */
     807           0 : #ifdef RE_ENABLE_I18N
     808             :   if (pstr->mb_cur_max > 1)
     809           0 :     {
     810           0 :       if (pstr->icase)
     811           0 :         {
     812             :           reg_errcode_t ret = build_wcs_upper_buffer (pstr);
     813             :           if (BE (ret != REG_NOERROR, 0))
     814           0 :             return ret;
     815             :         }
     816             :       else
     817             :         build_wcs_buffer (pstr);
     818         337 :     }
     819             :   else
     820           0 : #endif /* RE_ENABLE_I18N */
     821           0 :     if (BE (pstr->mbs_allocated, 0))
     822           0 :       {
     823           0 :         if (pstr->icase)
     824             :           build_upper_buffer (pstr);
     825             :         else if (pstr->trans != NULL)
     826         337 :           re_string_translate_buffer (pstr);
     827             :       }
     828         337 :     else
     829         337 :       pstr->valid_len = pstr->len;
     830             : 
     831             :   pstr->cur_idx = 0;
     832             :   return REG_NOERROR;
     833             : }
     834         475 : 
     835             : static unsigned char
     836             : internal_function __attribute ((pure))
     837             : re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
     838             : {
     839             :   int ch;
     840         475 :   Idx off;
     841         475 : 
     842             :   /* Handle the common (easiest) cases first.  */
     843             :   if (BE (!pstr->mbs_allocated, 1))
     844           0 :     return re_string_peek_byte (pstr, idx);
     845           0 : 
     846           0 : #ifdef RE_ENABLE_I18N
     847             :   if (pstr->mb_cur_max > 1
     848             :       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
     849           0 :     return re_string_peek_byte (pstr, idx);
     850             : #endif
     851           0 : 
     852           0 :   off = pstr->cur_idx + idx;
     853             : #ifdef RE_ENABLE_I18N
     854             :   if (pstr->offsets_needed)
     855           0 :     off = pstr->offsets[off];
     856             : #endif
     857             : 
     858             :   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
     859             : 
     860             : #ifdef RE_ENABLE_I18N
     861             :   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
     862           0 :      this function returns CAPITAL LETTER I instead of first byte of
     863           0 :      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
     864             :      since peek_byte_case doesn't advance cur_idx in any way.  */
     865             :   if (pstr->offsets_needed && !isascii (ch))
     866           0 :     return re_string_peek_byte (pstr, idx);
     867             : #endif
     868             : 
     869             :   return ch;
     870             : }
     871           1 : 
     872             : static unsigned char
     873           1 : internal_function __attribute ((pure))
     874           1 : re_string_fetch_byte_case (re_string_t *pstr)
     875             : {
     876             :   if (BE (!pstr->mbs_allocated, 1))
     877           0 :     return re_string_fetch_byte (pstr);
     878             : 
     879             : #ifdef RE_ENABLE_I18N
     880             :   if (pstr->offsets_needed)
     881             :     {
     882             :       Idx off;
     883             :       int ch;
     884             : 
     885             :       /* For tr_TR.UTF-8 [[:islower:]] there is
     886             :          [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
     887             :          in that case the whole multi-byte character and return
     888             :          the original letter.  On the other side, with
     889           0 :          [[: DOTLESS SMALL LETTER I return [[:I, as doing
     890           0 :          anything else would complicate things too much.  */
     891             : 
     892           0 :       if (!re_string_first_byte (pstr, pstr->cur_idx))
     893           0 :         return re_string_fetch_byte (pstr);
     894             : 
     895           0 :       off = pstr->offsets[pstr->cur_idx];
     896           0 :       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
     897             : 
     898           0 :       if (! isascii (ch))
     899             :         return re_string_fetch_byte (pstr);
     900           0 : 
     901             :       re_string_skip_bytes (pstr,
     902             :                             re_string_char_size_at (pstr, pstr->cur_idx));
     903             :       return ch;
     904           0 :     }
     905             : #endif
     906             : 
     907             :   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
     908             : }
     909         677 : 
     910             : static void
     911             : internal_function
     912         677 : re_string_destruct (re_string_t *pstr)
     913         677 : {
     914             : #ifdef RE_ENABLE_I18N
     915         677 :   re_free (pstr->wcs);
     916           0 :   re_free (pstr->offsets);
     917         677 : #endif /* RE_ENABLE_I18N  */
     918             :   if (pstr->mbs_allocated)
     919             :     re_free (pstr->mbs);
     920             : }
     921             : 
     922             : /* Return the context at IDX in INPUT.  */
     923         337 : 
     924             : static unsigned int
     925             : internal_function
     926         337 : re_string_context_at (const re_string_t *input, Idx idx, int eflags)
     927             : {
     928             :   int c;
     929         110 :   if (BE (! REG_VALID_INDEX (idx), 0))
     930         227 :     /* In this case, we use the value stored in input->tip_context,
     931          53 :        since we can't know the character in input->mbs[-1] here.  */
     932          53 :     return input->tip_context;
     933             :   if (BE (idx == input->len, 0))
     934         174 :     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
     935             :             : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
     936             : #ifdef RE_ENABLE_I18N
     937           0 :   if (input->mb_cur_max > 1)
     938           0 :     {
     939             :       wint_t wc;
     940             :       Idx wc_idx = idx;
     941             :       while(input->wcs[wc_idx] == WEOF)
     942             :         {
     943             : #ifdef DEBUG
     944           0 :           /* It must not happen.  */
     945           0 :           assert (REG_VALID_INDEX (wc_idx));
     946           0 : #endif
     947             :           --wc_idx;
     948           0 :           if (! REG_VALID_INDEX (wc_idx))
     949           0 :             return input->tip_context;
     950           0 :         }
     951           0 :       wc = input->wcs[wc_idx];
     952           0 :       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
     953             :         return CONTEXT_WORD;
     954             :       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
     955             :               ? CONTEXT_NEWLINE : 0);
     956             :     }
     957         174 :   else
     958         174 : #endif
     959          32 :     {
     960         142 :       c = re_string_byte_at (input, idx);
     961             :       if (bitset_contain (input->word_char, c))
     962             :         return CONTEXT_WORD;
     963             :       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
     964             :     }
     965             : }
     966             : 
     967             : /* Functions for set operation.  */
     968        2314 : 
     969             : static reg_errcode_t
     970        2314 : internal_function
     971        2314 : re_node_set_alloc (re_node_set *set, Idx size)
     972        2314 : {
     973        2314 :   set->alloc = size;
     974           0 :   set->nelem = 0;
     975        2314 :   set->elems = re_malloc (Idx, size);
     976             :   if (BE (set->elems == NULL, 0))
     977             :     return REG_ESPACE;
     978             :   return REG_NOERROR;
     979             : }
     980         523 : 
     981             : static reg_errcode_t
     982         523 : internal_function
     983         523 : re_node_set_init_1 (re_node_set *set, Idx elem)
     984         523 : {
     985         523 :   set->alloc = 1;
     986             :   set->nelem = 1;
     987           0 :   set->elems = re_malloc (Idx, 1);
     988           0 :   if (BE (set->elems == NULL, 0))
     989             :     {
     990         523 :       set->alloc = set->nelem = 0;
     991         523 :       return REG_ESPACE;
     992             :     }
     993             :   set->elems[0] = elem;
     994             :   return REG_NOERROR;
     995             : }
     996         319 : 
     997             : static reg_errcode_t
     998         319 : internal_function
     999         319 : re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
    1000         319 : {
    1001           0 :   set->alloc = 2;
    1002         319 :   set->elems = re_malloc (Idx, 2);
    1003             :   if (BE (set->elems == NULL, 0))
    1004           4 :     return REG_ESPACE;
    1005           4 :   if (elem1 == elem2)
    1006             :     {
    1007             :       set->nelem = 1;
    1008             :       set->elems[0] = elem1;
    1009         315 :     }
    1010         315 :   else
    1011             :     {
    1012         314 :       set->nelem = 2;
    1013         314 :       if (elem1 < elem2)
    1014             :         {
    1015             :           set->elems[0] = elem1;
    1016             :           set->elems[1] = elem2;
    1017           1 :         }
    1018           1 :       else
    1019             :         {
    1020             :           set->elems[0] = elem2;
    1021         319 :           set->elems[1] = elem1;
    1022             :         }
    1023             :     }
    1024             :   return REG_NOERROR;
    1025             : }
    1026         945 : 
    1027             : static reg_errcode_t
    1028         945 : internal_function
    1029         945 : re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
    1030             : {
    1031         945 :   dest->nelem = src->nelem;
    1032         945 :   if (src->nelem > 0)
    1033         945 :     {
    1034             :       dest->alloc = dest->nelem;
    1035           0 :       dest->elems = re_malloc (Idx, dest->alloc);
    1036           0 :       if (BE (dest->elems == NULL, 0))
    1037             :         {
    1038         945 :           dest->alloc = dest->nelem = 0;
    1039             :           return REG_ESPACE;
    1040             :         }
    1041           0 :       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
    1042         945 :     }
    1043             :   else
    1044             :     re_node_set_init_empty (dest);
    1045             :   return REG_NOERROR;
    1046             : }
    1047             : 
    1048             : /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
    1049             :    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
    1050             :    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
    1051          17 : 
    1052             : static reg_errcode_t
    1053             : internal_function
    1054             : re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
    1055          17 :                            const re_node_set *src2)
    1056           0 : {
    1057             :   Idx i1, i2, is, id, delta, sbase;
    1058             :   if (src1->nelem == 0 || src2->nelem == 0)
    1059             :     return REG_NOERROR;
    1060          17 : 
    1061             :   /* We need dest->nelem + 2 * elems_in_intersection; this is a
    1062          11 :      conservative estimate.  */
    1063          11 :   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
    1064          11 :     {
    1065           0 :       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
    1066          11 :       Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
    1067          11 :       if (BE (new_elems == NULL, 0))
    1068             :         return REG_ESPACE;
    1069             :       dest->elems = new_elems;
    1070             :       dest->alloc = new_alloc;
    1071             :     }
    1072          17 : 
    1073          17 :   /* Find the items in the intersection of SRC1 and SRC2, and copy
    1074          17 :      into the top of DEST those that are not already in DEST itself.  */
    1075          17 :   sbase = dest->nelem + src1->nelem + src2->nelem;
    1076             :   i1 = src1->nelem - 1;
    1077             :   i2 = src2->nelem - 1;
    1078         157 :   id = dest->nelem - 1;
    1079             :   for (;;)
    1080             :     {
    1081         139 :       if (src1->elems[i1] == src2->elems[i2])
    1082          11 :         {
    1083             :           /* Try to find the item in DEST.  Maybe we could binary search?  */
    1084          64 :           while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
    1085          47 :             --id;
    1086             : 
    1087          64 :           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
    1088             :             dest->elems[--sbase] = src1->elems[i1];
    1089             : 
    1090             :           if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
    1091             :             break;
    1092          23 :         }
    1093             : 
    1094           0 :       /* Lower the highest of the two items.  */
    1095           0 :       else if (src1->elems[i1] < src2->elems[i2])
    1096             :         {
    1097             :           if (! REG_VALID_INDEX (--i2))
    1098             :             break;
    1099          23 :         }
    1100           5 :       else
    1101             :         {
    1102             :           if (! REG_VALID_INDEX (--i1))
    1103             :             break;
    1104          17 :         }
    1105          17 :     }
    1106          17 : 
    1107             :   id = dest->nelem - 1;
    1108             :   is = dest->nelem + src1->nelem + src2->nelem - 1;
    1109             :   delta = is - sbase + 1;
    1110             : 
    1111          17 :   /* Now copy.  When DELTA becomes zero, the remaining
    1112          17 :      DEST elements are already in place; this is more or
    1113             :      less the same loop that is in re_node_set_merge.  */
    1114             :   dest->nelem += delta;
    1115          11 :   if (delta > 0 && REG_VALID_INDEX (id))
    1116             :     for (;;)
    1117             :       {
    1118           0 :         if (dest->elems[is] > dest->elems[id])
    1119           0 :           {
    1120           0 :             /* Copy from the top.  */
    1121             :             dest->elems[id + delta--] = dest->elems[is--];
    1122             :             if (delta == 0)
    1123             :               break;
    1124             :           }
    1125          11 :         else
    1126          11 :           {
    1127          11 :             /* Slide from the bottom.  */
    1128             :             dest->elems[id + delta] = dest->elems[id];
    1129             :             if (! REG_VALID_INDEX (--id))
    1130             :               break;
    1131             :           }
    1132          17 :       }
    1133             : 
    1134          17 :   /* Copy remaining SRC elements.  */
    1135             :   memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
    1136             : 
    1137             :   return REG_NOERROR;
    1138             : }
    1139             : 
    1140             : /* Calculate the union set of the sets SRC1 and SRC2. And store it to
    1141             :    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
    1142           0 : 
    1143             : static reg_errcode_t
    1144             : internal_function
    1145             : re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
    1146           0 :                         const re_node_set *src2)
    1147             : {
    1148           0 :   Idx i1, i2, id;
    1149           0 :   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
    1150           0 :     {
    1151           0 :       dest->alloc = src1->nelem + src2->nelem;
    1152             :       dest->elems = re_malloc (Idx, dest->alloc);
    1153             :       if (BE (dest->elems == NULL, 0))
    1154             :         return REG_ESPACE;
    1155           0 :     }
    1156           0 :   else
    1157           0 :     {
    1158           0 :       if (src1 != NULL && src1->nelem > 0)
    1159             :         return re_node_set_init_copy (dest, src1);
    1160           0 :       else if (src2 != NULL && src2->nelem > 0)
    1161           0 :         return re_node_set_init_copy (dest, src2);
    1162             :       else
    1163           0 :         re_node_set_init_empty (dest);
    1164             :       return REG_NOERROR;
    1165           0 :     }
    1166             :   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
    1167           0 :     {
    1168           0 :       if (src1->elems[i1] > src2->elems[i2])
    1169             :         {
    1170           0 :           dest->elems[id++] = src2->elems[i2++];
    1171           0 :           continue;
    1172           0 :         }
    1173             :       if (src1->elems[i1] == src2->elems[i2])
    1174           0 :         ++i2;
    1175             :       dest->elems[id++] = src1->elems[i1++];
    1176           0 :     }
    1177           0 :   if (i1 < src1->nelem)
    1178           0 :     {
    1179             :       memcpy (dest->elems + id, src1->elems + i1,
    1180           0 :              (src1->nelem - i1) * sizeof (Idx));
    1181             :       id += src1->nelem - i1;
    1182           0 :     }
    1183           0 :   else if (i2 < src2->nelem)
    1184           0 :     {
    1185             :       memcpy (dest->elems + id, src2->elems + i2,
    1186           0 :              (src2->nelem - i2) * sizeof (Idx));
    1187           0 :       id += src2->nelem - i2;
    1188             :     }
    1189             :   dest->nelem = id;
    1190             :   return REG_NOERROR;
    1191             : }
    1192             : 
    1193             : /* Calculate the union set of the sets DEST and SRC. And store it to
    1194             :    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
    1195        1239 : 
    1196             : static reg_errcode_t
    1197             : internal_function
    1198        1239 : re_node_set_merge (re_node_set *dest, const re_node_set *src)
    1199           0 : {
    1200        1239 :   Idx is, id, sbase, delta;
    1201             :   if (src == NULL || src->nelem == 0)
    1202         566 :     return REG_NOERROR;
    1203         566 :   if (dest->alloc < 2 * src->nelem + dest->nelem)
    1204         566 :     {
    1205           0 :       Idx new_alloc = 2 * (src->nelem + dest->alloc);
    1206         566 :       Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
    1207         566 :       if (BE (new_buffer == NULL, 0))
    1208             :         return REG_ESPACE;
    1209             :       dest->elems = new_buffer;
    1210        1239 :       dest->alloc = new_alloc;
    1211             :     }
    1212         851 : 
    1213         851 :   if (BE (dest->nelem == 0, 0))
    1214         851 :     {
    1215             :       dest->nelem = src->nelem;
    1216             :       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
    1217             :       return REG_NOERROR;
    1218             :     }
    1219        2793 : 
    1220         388 :   /* Copy into the top of DEST the items of SRC that are not
    1221        1629 :      found in DEST.  Maybe we could binary search in DEST?  */
    1222             :   for (sbase = dest->nelem + 2 * src->nelem,
    1223        1629 :        is = src->nelem - 1, id = dest->nelem - 1;
    1224           5 :        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
    1225        1624 :     {
    1226        1039 :       if (dest->elems[id] == src->elems[is])
    1227             :         is--, id--;
    1228         585 :       else if (dest->elems[id] < src->elems[is])
    1229             :         dest->elems[--sbase] = src->elems[is--];
    1230             :       else /* if (dest->elems[id] > src->elems[is]) */
    1231         388 :         --id;
    1232             :     }
    1233             : 
    1234           0 :   if (REG_VALID_INDEX (is))
    1235           0 :     {
    1236             :       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
    1237             :       sbase -= is + 1;
    1238         388 :       memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
    1239         388 :     }
    1240         388 : 
    1241         388 :   id = dest->nelem - 1;
    1242           4 :   is = dest->nelem + 2 * src->nelem - 1;
    1243             :   delta = is - sbase + 1;
    1244             :   if (delta == 0)
    1245             :     return REG_NOERROR;
    1246         384 : 
    1247             :   /* Now copy.  When DELTA becomes zero, the remaining
    1248             :      DEST elements are already in place.  */
    1249        2862 :   dest->nelem += delta;
    1250             :   for (;;)
    1251             :     {
    1252        1039 :       if (dest->elems[is] > dest->elems[id])
    1253        1039 :         {
    1254         384 :           /* Copy from the top.  */
    1255             :           dest->elems[id + delta--] = dest->elems[is--];
    1256             :           if (delta == 0)
    1257             :             break;
    1258             :         }
    1259         584 :       else
    1260         584 :         {
    1261             :           /* Slide from the bottom.  */
    1262             :           dest->elems[id + delta] = dest->elems[id];
    1263           0 :           if (! REG_VALID_INDEX (--id))
    1264             :             {
    1265           0 :               /* Copy remaining SRC elements.  */
    1266             :               memcpy (dest->elems, dest->elems + sbase,
    1267             :                       delta * sizeof (Idx));
    1268             :               break;
    1269             :             }
    1270         384 :         }
    1271             :     }
    1272             : 
    1273             :   return REG_NOERROR;
    1274             : }
    1275             : 
    1276             : /* Insert the new element ELEM to the re_node_set* SET.
    1277             :    SET should not already have ELEM.
    1278             :    Return true if successful.  */
    1279        2137 : 
    1280             : static bool
    1281             : internal_function
    1282             : re_node_set_insert (re_node_set *set, Idx elem)
    1283        2137 : {
    1284         150 :   Idx idx;
    1285             :   /* In case the set is empty.  */
    1286        1987 :   if (set->alloc == 0)
    1287             :     return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
    1288             : 
    1289        1138 :   if (BE (set->nelem, 0) == 0)
    1290        1138 :     {
    1291        1138 :       /* We already guaranteed above that set->alloc != 0.  */
    1292             :       set->elems[0] = elem;
    1293             :       ++set->nelem;
    1294             :       return true;
    1295         849 :     }
    1296             : 
    1297             :   /* Realloc if we need.  */
    1298          90 :   if (set->alloc == set->nelem)
    1299          90 :     {
    1300          90 :       Idx *new_elems;
    1301           0 :       set->alloc = set->alloc * 2;
    1302          90 :       new_elems = re_realloc (set->elems, Idx, set->alloc);
    1303             :       if (BE (new_elems == NULL, 0))
    1304             :         return false;
    1305             :       set->elems = new_elems;
    1306             :     }
    1307         849 : 
    1308             :   /* Move the elements which follows the new element.  Test the
    1309         424 :      first element separately to skip a check in the inner loop.  */
    1310        1977 :   if (elem < set->elems[0])
    1311        1553 :     {
    1312             :       idx = 0;
    1313             :       for (idx = set->nelem; idx > 0; idx--)
    1314             :         set->elems[idx] = set->elems[idx - 1];
    1315        1843 :     }
    1316        1418 :   else
    1317             :     {
    1318             :       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
    1319             :         set->elems[idx] = set->elems[idx - 1];
    1320         849 :     }
    1321         849 : 
    1322         849 :   /* Insert the new element.  */
    1323             :   set->elems[idx] = elem;
    1324             :   ++set->nelem;
    1325             :   return true;
    1326             : }
    1327             : 
    1328             : /* Insert the new element ELEM to the re_node_set* SET.
    1329             :    SET should not already have any element greater than or equal to ELEM.
    1330             :    Return true if successful.  */
    1331        5215 : 
    1332             : static bool
    1333             : internal_function
    1334        5215 : re_node_set_insert_last (re_node_set *set, Idx elem)
    1335             : {
    1336             :   /* Realloc if we need.  */
    1337        2240 :   if (set->alloc == set->nelem)
    1338        2240 :     {
    1339        2240 :       Idx *new_elems;
    1340           0 :       set->alloc = (set->alloc + 1) * 2;
    1341        2240 :       new_elems = re_realloc (set->elems, Idx, set->alloc);
    1342             :       if (BE (new_elems == NULL, 0))
    1343             :         return false;
    1344             :       set->elems = new_elems;
    1345        5215 :     }
    1346        5215 : 
    1347             :   /* Insert the new element.  */
    1348             :   set->elems[set->nelem++] = elem;
    1349             :   return true;
    1350             : }
    1351             : 
    1352             : /* Compare two node sets SET1 and SET2.
    1353             :    Return true if SET1 and SET2 are equivalent.  */
    1354          40 : 
    1355             : static bool
    1356             : internal_function __attribute ((pure))
    1357          40 : re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
    1358           0 : {
    1359         365 :   Idx i;
    1360         285 :   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
    1361           0 :     return false;
    1362          40 :   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
    1363             :     if (set1->elems[i] != set2->elems[i])
    1364             :       return false;
    1365             :   return true;
    1366             : }
    1367             : 
    1368             : /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
    1369          87 : 
    1370             : static Idx
    1371             : internal_function __attribute ((pure))
    1372          87 : re_node_set_contains (const re_node_set *set, Idx elem)
    1373           0 : {
    1374             :   __re_size_t idx, right, mid;
    1375             :   if (! REG_VALID_NONZERO_INDEX (set->nelem))
    1376          87 :     return 0;
    1377          87 : 
    1378         405 :   /* Binary search the element.  */
    1379             :   idx = 0;
    1380         231 :   right = set->nelem - 1;
    1381         231 :   while (idx < right)
    1382         113 :     {
    1383             :       mid = (idx + right) / 2;
    1384         118 :       if (set->elems[mid] < elem)
    1385             :         idx = mid + 1;
    1386          87 :       else
    1387             :         right = mid;
    1388             :     }
    1389             :   return set->elems[idx] == elem ? idx + 1 : 0;
    1390             : }
    1391         217 : 
    1392             : static void
    1393         217 : internal_function
    1394           0 : re_node_set_remove_at (re_node_set *set, Idx idx)
    1395         217 : {
    1396         590 :   if (idx < 0 || idx >= set->nelem)
    1397         373 :     return;
    1398             :   --set->nelem;
    1399             :   for (; idx < set->nelem; idx++)
    1400             :     set->elems[idx] = set->elems[idx + 1];
    1401             : }
    1402             : 
    1403             : 
    1404             : /* Add the token TOKEN to dfa->nodes, and return the index of the token.
    1405             :    Or return REG_MISSING if an error occurred.  */
    1406        1725 : 
    1407             : static Idx
    1408        1725 : internal_function
    1409             : re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
    1410          30 : {
    1411             :   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
    1412             :     {
    1413             :       size_t new_nodes_alloc = dfa->nodes_alloc * 2;
    1414          30 :       Idx *new_nexts, *new_indices;
    1415             :       re_node_set *new_edests, *new_eclosures;
    1416             :       re_token_t *new_nodes;
    1417             :       size_t max_object_size =
    1418             :         MAX (sizeof (re_token_t),
    1419             :              MAX (sizeof (re_node_set),
    1420          30 :                   sizeof (Idx)));
    1421           0 : 
    1422             :       /* Avoid overflows.  */
    1423          30 :       if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
    1424          30 :         return REG_MISSING;
    1425           0 : 
    1426          30 :       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
    1427          30 :       if (BE (new_nodes == NULL, 0))
    1428          30 :         return REG_MISSING;
    1429          30 :       dfa->nodes = new_nodes;
    1430          30 :       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
    1431          30 :       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
    1432             :       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
    1433           0 :       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
    1434          30 :       if (BE (new_nexts == NULL || new_indices == NULL
    1435          30 :               || new_edests == NULL || new_eclosures == NULL, 0))
    1436          30 :         return REG_MISSING;
    1437          30 :       dfa->nexts = new_nexts;
    1438          30 :       dfa->org_indices = new_indices;
    1439             :       dfa->edests = new_edests;
    1440        1725 :       dfa->eclosures = new_eclosures;
    1441        1725 :       dfa->nodes_alloc = new_nodes_alloc;
    1442             :     }
    1443             :   dfa->nodes[dfa->nodes_len] = token;
    1444        1725 :   dfa->nodes[dfa->nodes_len].constraint = 0;
    1445        3450 : #ifdef RE_ENABLE_I18N
    1446        3450 :   {
    1447             :   int type = token.type;
    1448             :   dfa->nodes[dfa->nodes_len].accept_mb =
    1449        1725 :     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
    1450        1725 :   }
    1451        1725 : #endif
    1452        1725 :   dfa->nexts[dfa->nodes_len] = REG_MISSING;
    1453             :   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
    1454             :   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
    1455             :   return dfa->nodes_len++;
    1456             : }
    1457         536 : 
    1458             : static inline re_hashval_t
    1459         536 : internal_function
    1460             : calc_state_hash (const re_node_set *nodes, unsigned int context)
    1461        2122 : {
    1462        1586 :   re_hashval_t hash = nodes->nelem + context;
    1463         536 :   Idx i;
    1464             :   for (i = 0 ; i < nodes->nelem ; i++)
    1465             :     hash += nodes->elems[i];
    1466             :   return hash;
    1467             : }
    1468             : 
    1469             : /* Search for the state whose node_set is equivalent to NODES.
    1470             :    Return the pointer to the state, if we found it in the DFA.
    1471             :    Otherwise create the new one and return it.  In case of an error
    1472             :    return NULL and set the error code in ERR.
    1473             :    Note: - We assume NULL as the invalid state, then it is possible that
    1474             :            return value is NULL and ERR is REG_NOERROR.
    1475             :          - We never return non-NULL value in case of any errors, it is for
    1476             :            optimization.  */
    1477          34 : 
    1478             : static re_dfastate_t *
    1479             : internal_function
    1480             : re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
    1481             :                   const re_node_set *nodes)
    1482             : {
    1483             :   re_hashval_t hash;
    1484             :   re_dfastate_t *new_state;
    1485             :   struct re_state_table_entry *spot;
    1486             :   Idx i;
    1487             : #ifdef lint
    1488          34 :   /* Suppress bogus uninitialized-variable warnings.  */
    1489             :   *err = REG_NOERROR;
    1490           0 : #endif
    1491           0 :   if (BE (nodes->nelem == 0, 0))
    1492             :     {
    1493          34 :       *err = REG_NOERROR;
    1494          34 :       return NULL;
    1495             :     }
    1496          39 :   hash = calc_state_hash (nodes, 0);
    1497             :   spot = dfa->state_table + (hash & dfa->state_hash_mask);
    1498          17 : 
    1499          17 :   for (i = 0 ; i < spot->num ; i++)
    1500           5 :     {
    1501          12 :       re_dfastate_t *state = spot->array[i];
    1502          12 :       if (hash != state->hash)
    1503             :         continue;
    1504             :       if (re_node_set_compare (&state->nodes, nodes))
    1505             :         return state;
    1506          22 :     }
    1507          22 : 
    1508           0 :   /* There are no appropriate state in the dfa, create the new one.  */
    1509             :   new_state = create_ci_newstate (dfa, nodes, hash);
    1510          22 :   if (BE (new_state == NULL, 0))
    1511             :     *err = REG_ESPACE;
    1512             : 
    1513             :   return new_state;
    1514             : }
    1515             : 
    1516             : /* Search for the state whose node_set is equivalent to NODES and
    1517             :    whose context is equivalent to CONTEXT.
    1518             :    Return the pointer to the state, if we found it in the DFA.
    1519             :    Otherwise create the new one and return it.  In case of an error
    1520             :    return NULL and set the error code in ERR.
    1521             :    Note: - We assume NULL as the invalid state, then it is possible that
    1522             :            return value is NULL and ERR is REG_NOERROR.
    1523             :          - We never return non-NULL value in case of any errors, it is for
    1524             :            optimization.  */
    1525         502 : 
    1526             : static re_dfastate_t *
    1527             : internal_function
    1528             : re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
    1529             :                           const re_node_set *nodes, unsigned int context)
    1530             : {
    1531             :   re_hashval_t hash;
    1532             :   re_dfastate_t *new_state;
    1533             :   struct re_state_table_entry *spot;
    1534             :   Idx i;
    1535             : #ifdef lint
    1536         502 :   /* Suppress bogus uninitialized-variable warnings.  */
    1537             :   *err = REG_NOERROR;
    1538           0 : #endif
    1539           0 :   if (nodes->nelem == 0)
    1540             :     {
    1541         502 :       *err = REG_NOERROR;
    1542         502 :       return NULL;
    1543             :     }
    1544         554 :   hash = calc_state_hash (nodes, context);
    1545             :   spot = dfa->state_table + (hash & dfa->state_hash_mask);
    1546          80 : 
    1547          80 :   for (i = 0 ; i < spot->num ; i++)
    1548          28 :     {
    1549          28 :       re_dfastate_t *state = spot->array[i];
    1550          28 :       if (state->hash == hash
    1551             :           && state->context == context
    1552             :           && re_node_set_compare (state->entrance_nodes, nodes))
    1553         474 :         return state;
    1554         474 :     }
    1555           0 :   /* There are no appropriate state in `dfa', create the new one.  */
    1556             :   new_state = create_cd_newstate (dfa, nodes, context, hash);
    1557         474 :   if (BE (new_state == NULL, 0))
    1558             :     *err = REG_ESPACE;
    1559             : 
    1560             :   return new_state;
    1561             : }
    1562             : 
    1563             : /* Finish initialization of the new state NEWSTATE, and using its hash value
    1564             :    HASH put in the appropriate bucket of DFA's state table.  Return value
    1565         496 :    indicates the error code if failed.  */
    1566             : 
    1567             : static reg_errcode_t
    1568             : register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
    1569             :                 re_hashval_t hash)
    1570             : {
    1571             :   struct re_state_table_entry *spot;
    1572         496 :   reg_errcode_t err;
    1573         496 :   Idx i;
    1574         496 : 
    1575           0 :   newstate->hash = hash;
    1576        1580 :   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
    1577             :   if (BE (err != REG_NOERROR, 0))
    1578        1084 :     return REG_ESPACE;
    1579        1084 :   for (i = 0; i < newstate->nodes.nelem; i++)
    1580         595 :     {
    1581           0 :       Idx elem = newstate->nodes.elems[i];
    1582             :       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
    1583             :         if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
    1584         496 :           return REG_ESPACE;
    1585         496 :     }
    1586             : 
    1587         457 :   spot = dfa->state_table + (hash & dfa->state_hash_mask);
    1588         457 :   if (BE (spot->alloc <= spot->num, 0))
    1589             :     {
    1590         457 :       Idx new_alloc = 2 * spot->num + 2;
    1591           0 :       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
    1592         457 :                                               new_alloc);
    1593         457 :       if (BE (new_array == NULL, 0))
    1594             :         return REG_ESPACE;
    1595         496 :       spot->array = new_array;
    1596         496 :       spot->alloc = new_alloc;
    1597             :     }
    1598             :   spot->array[spot->num++] = newstate;
    1599             :   return REG_NOERROR;
    1600         113 : }
    1601             : 
    1602         113 : static void
    1603         113 : free_state (re_dfastate_t *state)
    1604         113 : {
    1605             :   re_node_set_free (&state->non_eps_nodes);
    1606          53 :   re_node_set_free (&state->inveclosure);
    1607          53 :   if (state->entrance_nodes != &state->nodes)
    1608             :     {
    1609         113 :       re_node_set_free (state->entrance_nodes);
    1610         113 :       re_free (state->entrance_nodes);
    1611         113 :     }
    1612         113 :   re_node_set_free (&state->nodes);
    1613         113 :   re_free (state->word_trtable);
    1614             :   re_free (state->trtable);
    1615             :   re_free (state);
    1616             : }
    1617             : 
    1618             : /* Create the new state which is independ of contexts.
    1619             :    Return the new state if succeeded, otherwise return NULL.  */
    1620          22 : 
    1621             : static re_dfastate_t *
    1622             : internal_function
    1623             : create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
    1624             :                     re_hashval_t hash)
    1625             : {
    1626             :   Idx i;
    1627          22 :   reg_errcode_t err;
    1628          22 :   re_dfastate_t *newstate;
    1629           0 : 
    1630          22 :   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
    1631          22 :   if (BE (newstate == NULL, 0))
    1632             :     return NULL;
    1633           0 :   err = re_node_set_init_copy (&newstate->nodes, nodes);
    1634           0 :   if (BE (err != REG_NOERROR, 0))
    1635             :     {
    1636             :       re_free (newstate);
    1637          22 :       return NULL;
    1638          91 :     }
    1639             : 
    1640          69 :   newstate->entrance_nodes = &newstate->nodes;
    1641          69 :   for (i = 0 ; i < nodes->nelem ; i++)
    1642          69 :     {
    1643           0 :       re_token_t *node = dfa->nodes + nodes->elems[i];
    1644             :       re_token_type_t type = node->type;
    1645          69 :       if (type == CHARACTER && !node->constraint)
    1646             :         continue;
    1647             : #ifdef RE_ENABLE_I18N
    1648             :       newstate->accept_mb |= node->accept_mb;
    1649          69 : #endif /* RE_ENABLE_I18N */
    1650          12 : 
    1651          57 :       /* If the state has the halt node, the state is a halt state.  */
    1652           0 :       if (type == END_OF_RE)
    1653          57 :         newstate->halt = 1;
    1654          28 :       else if (type == OP_BACK_REF)
    1655             :         newstate->has_backref = 1;
    1656          22 :       else if (type == ANCHOR || node->constraint)
    1657          22 :         newstate->has_constraint = 1;
    1658             :     }
    1659           0 :   err = register_state (dfa, newstate, hash);
    1660           0 :   if (BE (err != REG_NOERROR, 0))
    1661             :     {
    1662          22 :       free_state (newstate);
    1663             :       newstate = NULL;
    1664             :     }
    1665             :   return newstate;
    1666             : }
    1667             : 
    1668             : /* Create the new state which is depend on the context CONTEXT.
    1669             :    Return the new state if succeeded, otherwise return NULL.  */
    1670         474 : 
    1671             : static re_dfastate_t *
    1672             : internal_function
    1673         474 : create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
    1674             :                     unsigned int context, re_hashval_t hash)
    1675             : {
    1676             :   Idx i, nctx_nodes = 0;
    1677         474 :   reg_errcode_t err;
    1678         474 :   re_dfastate_t *newstate;
    1679           0 : 
    1680         474 :   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
    1681         474 :   if (BE (newstate == NULL, 0))
    1682             :     return NULL;
    1683           0 :   err = re_node_set_init_copy (&newstate->nodes, nodes);
    1684           0 :   if (BE (err != REG_NOERROR, 0))
    1685             :     {
    1686             :       re_free (newstate);
    1687         474 :       return NULL;
    1688         474 :     }
    1689             : 
    1690        1706 :   newstate->context = context;
    1691             :   newstate->entrance_nodes = &newstate->nodes;
    1692        1232 : 
    1693        1232 :   for (i = 0 ; i < nodes->nelem ; i++)
    1694        1232 :     {
    1695        1232 :       unsigned int constraint = 0;
    1696         361 :       re_token_t *node = dfa->nodes + nodes->elems[i];
    1697             :       re_token_type_t type = node->type;
    1698        1232 :       if (node->constraint)
    1699         144 :         constraint = node->constraint;
    1700             : 
    1701        1088 :       if (type == CHARACTER && !constraint)
    1702             :         continue;
    1703             : #ifdef RE_ENABLE_I18N
    1704             :       newstate->accept_mb |= node->accept_mb;
    1705        1088 : #endif /* RE_ENABLE_I18N */
    1706         346 : 
    1707         742 :       /* If the state has the halt node, the state is a halt state.  */
    1708           0 :       if (type == END_OF_RE)
    1709         742 :         newstate->halt = 1;
    1710         273 :       else if (type == OP_BACK_REF)
    1711             :         newstate->has_backref = 1;
    1712        1088 :       else if (type == ANCHOR)
    1713             :         constraint = node->opr.ctx_type;
    1714         626 : 
    1715             :       if (constraint)
    1716         205 :         {
    1717         205 :           if (newstate->entrance_nodes == &newstate->nodes)
    1718             :             {
    1719           0 :               newstate->entrance_nodes = re_malloc (re_node_set, 1);
    1720           0 :               if (BE (newstate->entrance_nodes == NULL, 0))
    1721             :                 {
    1722         205 :                   free_state (newstate);
    1723         205 :                   return NULL;
    1724         205 :                 }
    1725             :               re_node_set_init_copy (newstate->entrance_nodes, nodes);
    1726             :               nctx_nodes = 0;
    1727         626 :               newstate->has_constraint = 1;
    1728             :             }
    1729         217 : 
    1730         217 :           if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
    1731             :             {
    1732             :               re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
    1733             :               ++nctx_nodes;
    1734         474 :             }
    1735         474 :         }
    1736             :     }
    1737           0 :   err = register_state (dfa, newstate, hash);
    1738           0 :   if (BE (err != REG_NOERROR, 0))
    1739             :     {
    1740         474 :       free_state (newstate);
    1741             :       newstate = NULL;
    1742             :     }
    1743             :   return  newstate;
    1744             : }

Generated by: LCOV version 1.10