LCOV - code coverage report
Current view: top level - lib - regexec.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 548 1800 30.4 %
Date: 2018-01-30 Functions: 25 60 41.7 %

          Line data    Source code
       1             : /* -*- buffer-read-only: t -*- vi: set ro: */
       2             : /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
       3             : #line 1
       4             : /* Extended regular expression matching and search library.
       5             :    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation,
       6             :    Inc.
       7             :    This file is part of the GNU C Library.
       8             :    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
       9             : 
      10             :    This program is free software; you can redistribute it and/or modify
      11             :    it under the terms of the GNU General Public License as published by
      12             :    the Free Software Foundation; either version 3, or (at your option)
      13             :    any later version.
      14             : 
      15             :    This program is distributed in the hope that it will be useful,
      16             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      17             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      18             :    GNU General Public License for more details.
      19             : 
      20             :    You should have received a copy of the GNU General Public License along
      21             :    with this program; if not, write to the Free Software Foundation,
      22             :    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
      23             : 
      24             : static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
      25             :                                      Idx n) internal_function;
      26             : static void match_ctx_clean (re_match_context_t *mctx) internal_function;
      27             : static void match_ctx_free (re_match_context_t *cache) internal_function;
      28             : static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
      29             :                                           Idx str_idx, Idx from, Idx to)
      30             :      internal_function;
      31             : static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
      32             :      internal_function;
      33             : static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
      34             :                                            Idx str_idx) internal_function;
      35             : static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
      36             :                                                     Idx node, Idx str_idx)
      37             :      internal_function;
      38             : static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
      39             :                            re_dfastate_t **limited_sts, Idx last_node,
      40             :                            Idx last_str_idx)
      41             :      internal_function;
      42             : static reg_errcode_t re_search_internal (const regex_t *preg,
      43             :                                          const char *string, Idx length,
      44             :                                          Idx start, Idx last_start, Idx stop,
      45             :                                          size_t nmatch, regmatch_t pmatch[],
      46             :                                          int eflags) internal_function;
      47             : static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
      48             :                                   const char *string1, Idx length1,
      49             :                                   const char *string2, Idx length2,
      50             :                                   Idx start, regoff_t range,
      51             :                                   struct re_registers *regs,
      52             :                                   Idx stop, bool ret_len) internal_function;
      53             : static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
      54             :                                 const char *string, Idx length, Idx start,
      55             :                                 regoff_t range, Idx stop,
      56             :                                 struct re_registers *regs,
      57             :                                 bool ret_len) internal_function;
      58             : static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
      59             :                                   Idx nregs, int regs_allocated)
      60             :      internal_function;
      61             : static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
      62             :      internal_function;
      63             : static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
      64             :                            Idx *p_match_first) internal_function;
      65             : static Idx check_halt_state_context (const re_match_context_t *mctx,
      66             :                                      const re_dfastate_t *state, Idx idx)
      67             :      internal_function;
      68             : static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
      69             :                          regmatch_t *prev_idx_match, Idx cur_node,
      70             :                          Idx cur_idx, Idx nmatch) internal_function;
      71             : static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
      72             :                                       Idx str_idx, Idx dest_node, Idx nregs,
      73             :                                       regmatch_t *regs,
      74             :                                       re_node_set *eps_via_nodes)
      75             :      internal_function;
      76             : static reg_errcode_t set_regs (const regex_t *preg,
      77             :                                const re_match_context_t *mctx,
      78             :                                size_t nmatch, regmatch_t *pmatch,
      79             :                                bool fl_backtrack) internal_function;
      80             : static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
      81             :      internal_function;
      82             : 
      83             : #ifdef RE_ENABLE_I18N
      84             : static int sift_states_iter_mb (const re_match_context_t *mctx,
      85             :                                 re_sift_context_t *sctx,
      86             :                                 Idx node_idx, Idx str_idx, Idx max_str_idx)
      87             :      internal_function;
      88             : #endif /* RE_ENABLE_I18N */
      89             : static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
      90             :                                            re_sift_context_t *sctx)
      91             :      internal_function;
      92             : static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
      93             :                                           re_sift_context_t *sctx, Idx str_idx,
      94             :                                           re_node_set *cur_dest)
      95             :      internal_function;
      96             : static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
      97             :                                               re_sift_context_t *sctx,
      98             :                                               Idx str_idx,
      99             :                                               re_node_set *dest_nodes)
     100             :      internal_function;
     101             : static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
     102             :                                             re_node_set *dest_nodes,
     103             :                                             const re_node_set *candidates)
     104             :      internal_function;
     105             : static bool check_dst_limits (const re_match_context_t *mctx,
     106             :                               const re_node_set *limits,
     107             :                               Idx dst_node, Idx dst_idx, Idx src_node,
     108             :                               Idx src_idx) internal_function;
     109             : static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
     110             :                                         int boundaries, Idx subexp_idx,
     111             :                                         Idx from_node, Idx bkref_idx)
     112             :      internal_function;
     113             : static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
     114             :                                       Idx limit, Idx subexp_idx,
     115             :                                       Idx node, Idx str_idx,
     116             :                                       Idx bkref_idx) internal_function;
     117             : static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
     118             :                                           re_node_set *dest_nodes,
     119             :                                           const re_node_set *candidates,
     120             :                                           re_node_set *limits,
     121             :                                           struct re_backref_cache_entry *bkref_ents,
     122             :                                           Idx str_idx) internal_function;
     123             : static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
     124             :                                         re_sift_context_t *sctx,
     125             :                                         Idx str_idx, const re_node_set *candidates)
     126             :      internal_function;
     127             : static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
     128             :                                         re_dfastate_t **dst,
     129             :                                         re_dfastate_t **src, Idx num)
     130             :      internal_function;
     131             : static re_dfastate_t *find_recover_state (reg_errcode_t *err,
     132             :                                          re_match_context_t *mctx) internal_function;
     133             : static re_dfastate_t *transit_state (reg_errcode_t *err,
     134             :                                      re_match_context_t *mctx,
     135             :                                      re_dfastate_t *state) internal_function;
     136             : static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
     137             :                                             re_match_context_t *mctx,
     138             :                                             re_dfastate_t *next_state)
     139             :      internal_function;
     140             : static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
     141             :                                                 re_node_set *cur_nodes,
     142             :                                                 Idx str_idx) internal_function;
     143             : #if 0
     144             : static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
     145             :                                         re_match_context_t *mctx,
     146             :                                         re_dfastate_t *pstate)
     147             :      internal_function;
     148             : #endif
     149             : #ifdef RE_ENABLE_I18N
     150             : static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
     151             :                                        re_dfastate_t *pstate)
     152             :      internal_function;
     153             : #endif /* RE_ENABLE_I18N */
     154             : static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
     155             :                                           const re_node_set *nodes)
     156             :      internal_function;
     157             : static reg_errcode_t get_subexp (re_match_context_t *mctx,
     158             :                                  Idx bkref_node, Idx bkref_str_idx)
     159             :      internal_function;
     160             : static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
     161             :                                      const re_sub_match_top_t *sub_top,
     162             :                                      re_sub_match_last_t *sub_last,
     163             :                                      Idx bkref_node, Idx bkref_str)
     164             :      internal_function;
     165             : static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
     166             :                              Idx subexp_idx, int type) internal_function;
     167             : static reg_errcode_t check_arrival (re_match_context_t *mctx,
     168             :                                     state_array_t *path, Idx top_node,
     169             :                                     Idx top_str, Idx last_node, Idx last_str,
     170             :                                     int type) internal_function;
     171             : static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
     172             :                                                    Idx str_idx,
     173             :                                                    re_node_set *cur_nodes,
     174             :                                                    re_node_set *next_nodes)
     175             :      internal_function;
     176             : static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
     177             :                                                re_node_set *cur_nodes,
     178             :                                                Idx ex_subexp, int type)
     179             :      internal_function;
     180             : static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
     181             :                                                    re_node_set *dst_nodes,
     182             :                                                    Idx target, Idx ex_subexp,
     183             :                                                    int type) internal_function;
     184             : static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
     185             :                                          re_node_set *cur_nodes, Idx cur_str,
     186             :                                          Idx subexp_num, int type)
     187             :      internal_function;
     188             : static bool build_trtable (const re_dfa_t *dfa,
     189             :                            re_dfastate_t *state) internal_function;
     190             : #ifdef RE_ENABLE_I18N
     191             : static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
     192             :                                     const re_string_t *input, Idx idx)
     193             :      internal_function;
     194             : # ifdef _LIBC
     195             : static unsigned int find_collation_sequence_value (const unsigned char *mbs,
     196             :                                                    size_t name_len)
     197             :      internal_function;
     198             : # endif /* _LIBC */
     199             : #endif /* RE_ENABLE_I18N */
     200             : static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
     201             :                                        const re_dfastate_t *state,
     202             :                                        re_node_set *states_node,
     203             :                                        bitset_t *states_ch) internal_function;
     204             : static bool check_node_accept (const re_match_context_t *mctx,
     205             :                                const re_token_t *node, Idx idx)
     206             :      internal_function;
     207             : static reg_errcode_t extend_buffers (re_match_context_t *mctx)
     208             :      internal_function;
     209             : 
     210             : /* Entry point for POSIX code.  */
     211             : 
     212             : /* regexec searches for a given pattern, specified by PREG, in the
     213             :    string STRING.
     214             : 
     215             :    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
     216             :    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
     217             :    least NMATCH elements, and we set them to the offsets of the
     218             :    corresponding matched substrings.
     219             : 
     220             :    EFLAGS specifies `execution flags' which affect matching: if
     221             :    REG_NOTBOL is set, then ^ does not match at the beginning of the
     222             :    string; if REG_NOTEOL is set, then $ does not match at the end.
     223             : 
     224           0 :    We return 0 if we find a match and REG_NOMATCH if not.  */
     225             : 
     226             : int
     227             : regexec (preg, string, nmatch, pmatch, eflags)
     228             :     const regex_t *_Restrict_ preg;
     229             :     const char *_Restrict_ string;
     230             :     size_t nmatch;
     231             :     regmatch_t pmatch[_Restrict_arr_];
     232             :     int eflags;
     233             : {
     234             :   reg_errcode_t err;
     235             :   Idx start, length;
     236             : #ifdef _LIBC
     237           0 :   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
     238           0 : #endif
     239             : 
     240           0 :   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
     241             :     return REG_BADPAT;
     242           0 : 
     243           0 :   if (eflags & REG_STARTEND)
     244             :     {
     245             :       start = pmatch[0].rm_so;
     246             :       length = pmatch[0].rm_eo;
     247           0 :     }
     248           0 :   else
     249             :     {
     250             :       start = 0;
     251             :       length = strlen (string);
     252           0 :     }
     253           0 : 
     254             :   __libc_lock_lock (dfa->lock);
     255             :   if (preg->no_sub)
     256           0 :     err = re_search_internal (preg, string, length, start, length,
     257             :                               length, 0, NULL, eflags);
     258             :   else
     259           0 :     err = re_search_internal (preg, string, length, start, length,
     260             :                               length, nmatch, pmatch, eflags);
     261             :   __libc_lock_unlock (dfa->lock);
     262             :   return err != REG_NOERROR;
     263             : }
     264             : 
     265             : #ifdef _LIBC
     266             : # include <shlib-compat.h>
     267             : versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
     268             : 
     269             : # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
     270             : __typeof__ (__regexec) __compat_regexec;
     271             : 
     272             : int
     273             : attribute_compat_text_section
     274             : __compat_regexec (const regex_t *_Restrict_ preg,
     275             :                   const char *_Restrict_ string, size_t nmatch,
     276             :                   regmatch_t pmatch[], int eflags)
     277             : {
     278             :   return regexec (preg, string, nmatch, pmatch,
     279             :                   eflags & (REG_NOTBOL | REG_NOTEOL));
     280             : }
     281             : compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
     282             : # endif
     283             : #endif
     284             : 
     285             : /* Entry points for GNU code.  */
     286             : 
     287             : /* re_match, re_search, re_match_2, re_search_2
     288             : 
     289             :    The former two functions operate on STRING with length LENGTH,
     290             :    while the later two operate on concatenation of STRING1 and STRING2
     291             :    with lengths LENGTH1 and LENGTH2, respectively.
     292             : 
     293             :    re_match() matches the compiled pattern in BUFP against the string,
     294             :    starting at index START.
     295             : 
     296             :    re_search() first tries matching at index START, then it tries to match
     297             :    starting from index START + 1, and so on.  The last start position tried
     298             :    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
     299             :    way as re_match().)
     300             : 
     301             :    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
     302             :    the first STOP characters of the concatenation of the strings should be
     303             :    concerned.
     304             : 
     305             :    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
     306             :    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
     307             :    computed relative to the concatenation, not relative to the individual
     308             :    strings.)
     309             : 
     310             :    On success, re_match* functions return the length of the match, re_search*
     311             :    return the position of the start of the match.  Return value -1 means no
     312          39 :    match was found and -2 indicates an internal error.  */
     313             : 
     314             : regoff_t
     315             : re_match (bufp, string, length, start, regs)
     316             :     struct re_pattern_buffer *bufp;
     317             :     const char *string;
     318          39 :     Idx length, start;
     319             :     struct re_registers *regs;
     320             : {
     321             :   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
     322             : }
     323             : #ifdef _LIBC
     324             : weak_alias (__re_match, re_match)
     325         317 : #endif
     326             : 
     327             : regoff_t
     328             : re_search (bufp, string, length, start, range, regs)
     329             :     struct re_pattern_buffer *bufp;
     330             :     const char *string;
     331             :     Idx length, start;
     332         317 :     regoff_t range;
     333             :     struct re_registers *regs;
     334             : {
     335             :   return re_search_stub (bufp, string, length, start, range, length, regs,
     336             :                          false);
     337             : }
     338             : #ifdef _LIBC
     339             : weak_alias (__re_search, re_search)
     340           0 : #endif
     341             : 
     342             : regoff_t
     343             : re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
     344             :     struct re_pattern_buffer *bufp;
     345             :     const char *string1, *string2;
     346           0 :     Idx length1, length2, start, stop;
     347             :     struct re_registers *regs;
     348             : {
     349             :   return re_search_2_stub (bufp, string1, length1, string2, length2,
     350             :                            start, 0, regs, stop, true);
     351             : }
     352             : #ifdef _LIBC
     353             : weak_alias (__re_match_2, re_match_2)
     354           0 : #endif
     355             : 
     356             : regoff_t
     357             : re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
     358             :     struct re_pattern_buffer *bufp;
     359             :     const char *string1, *string2;
     360             :     Idx length1, length2, start, stop;
     361           0 :     regoff_t range;
     362             :     struct re_registers *regs;
     363             : {
     364             :   return re_search_2_stub (bufp, string1, length1, string2, length2,
     365             :                            start, range, regs, stop, false);
     366             : }
     367             : #ifdef _LIBC
     368             : weak_alias (__re_search_2, re_search_2)
     369             : #endif
     370           0 : 
     371             : static regoff_t
     372             : internal_function
     373             : re_search_2_stub (struct re_pattern_buffer *bufp,
     374             :                   const char *string1, Idx length1,
     375             :                   const char *string2, Idx length2,
     376             :                   Idx start, regoff_t range, struct re_registers *regs,
     377             :                   Idx stop, bool ret_len)
     378           0 : {
     379           0 :   const char *str;
     380             :   regoff_t rval;
     381           0 :   Idx len = length1 + length2;
     382           0 :   char *s = NULL;
     383             : 
     384             :   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
     385           0 :     return -2;
     386           0 : 
     387             :   /* Concatenate the strings.  */
     388           0 :   if (length2 > 0)
     389             :     if (length1 > 0)
     390           0 :       {
     391           0 :         s = re_malloc (char, len);
     392             : 
     393             :         if (BE (s == NULL, 0))
     394             :           return -2;
     395           0 : #ifdef _LIBC
     396           0 :         memcpy (__mempcpy (s, string1, length1), string2, length2);
     397             : #else
     398           0 :         memcpy (s, string1, length1);
     399             :         memcpy (s + length1, string2, length2);
     400             : #endif
     401           0 :         str = s;
     402             :       }
     403           0 :     else
     404             :       str = string2;
     405           0 :   else
     406             :     str = string1;
     407           0 : 
     408           0 :   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
     409             :                          ret_len);
     410             :   re_free (s);
     411             :   return rval;
     412             : }
     413             : 
     414             : /* The parameters have the same meaning as those of re_search.
     415             :    Additional parameters:
     416             :    If RET_LEN is true the length of the match is returned (re_match style);
     417             :    otherwise the position of the match is returned.  */
     418         356 : 
     419             : static regoff_t
     420             : internal_function
     421             : re_search_stub (struct re_pattern_buffer *bufp,
     422             :                 const char *string, Idx length,
     423             :                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
     424             :                 bool ret_len)
     425             : {
     426             :   reg_errcode_t result;
     427         356 :   regmatch_t *pmatch;
     428             :   Idx nregs;
     429             :   regoff_t rval;
     430             :   int eflags = 0;
     431         356 : #ifdef _LIBC
     432             :   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
     433             : #endif
     434         356 :   Idx last_start = start + range;
     435           0 : 
     436         356 :   /* Check for out-of-range.  */
     437           0 :   if (BE (start < 0 || start > length, 0))
     438         356 :     return -1;
     439           0 :   if (BE (length < last_start || (0 <= range && last_start < start), 0))
     440             :     last_start = length;
     441             :   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
     442             :     last_start = 0;
     443         356 : 
     444         356 :   __libc_lock_lock (dfa->lock);
     445             : 
     446             :   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
     447         356 :   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
     448          30 : 
     449             :   /* Compile fastmap if we haven't yet.  */
     450         356 :   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
     451           0 :     re_compile_fastmap (bufp);
     452             : 
     453             :   if (BE (bufp->no_sub, 0))
     454         356 :     regs = NULL;
     455         160 : 
     456         196 :   /* We need at least 1 register.  */
     457             :   if (regs == NULL)
     458             :     nregs = 1;
     459           0 :   else if (BE (bufp->regs_allocated == REGS_FIXED
     460           0 :                && regs->num_regs <= bufp->re_nsub, 0))
     461             :     {
     462             :       nregs = regs->num_regs;
     463           0 :       if (BE (nregs < 1, 0))
     464           0 :         {
     465             :           /* Nothing can be copied to regs.  */
     466             :           regs = NULL;
     467             :           nregs = 1;
     468         196 :         }
     469         356 :     }
     470         356 :   else
     471             :     nregs = bufp->re_nsub + 1;
     472           0 :   pmatch = re_malloc (regmatch_t, nregs);
     473           0 :   if (BE (pmatch == NULL, 0))
     474             :     {
     475             :       rval = -2;
     476         356 :       goto out;
     477             :     }
     478             : 
     479         356 :   result = re_search_internal (bufp, string, length, start, last_start, stop,
     480             :                                nregs, pmatch, eflags);
     481             : 
     482         356 :   rval = 0;
     483         157 : 
     484         199 :   /* I hope we needn't fill ther regs with -1's when no match was found.  */
     485             :   if (result != REG_NOERROR)
     486             :     rval = -1;
     487          99 :   else if (regs != NULL)
     488          99 :     {
     489          99 :       /* If caller wants register contents data back, copy them.  */
     490           0 :       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
     491             :                                            bufp->regs_allocated);
     492             :       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
     493         356 :         rval = -2;
     494             :     }
     495         199 : 
     496             :   if (BE (rval == 0, 1))
     497          14 :     {
     498          14 :       if (ret_len)
     499             :         {
     500             :           assert (pmatch[0].rm_so == start);
     501         185 :           rval = pmatch[0].rm_eo - start;
     502             :         }
     503         356 :       else
     504         356 :         rval = pmatch[0].rm_so;
     505             :     }
     506         356 :   re_free (pmatch);
     507             :  out:
     508             :   __libc_lock_unlock (dfa->lock);
     509             :   return rval;
     510             : }
     511          99 : 
     512             : static unsigned int
     513             : internal_function
     514          99 : re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
     515             :               int regs_allocated)
     516          99 : {
     517             :   int rval = REGS_REALLOCATE;
     518             :   Idx i;
     519             :   Idx need_regs = nregs + 1;
     520             :   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
     521          99 :      uses.  */
     522             : 
     523          32 :   /* Have the register data arrays been allocated?  */
     524          32 :   if (regs_allocated == REGS_UNALLOCATED)
     525           0 :     { /* No.  So allocate them with malloc.  */
     526          32 :       regs->start = re_malloc (regoff_t, need_regs);
     527          32 :       if (BE (regs->start == NULL, 0))
     528             :         return REGS_UNALLOCATED;
     529           0 :       regs->end = re_malloc (regoff_t, need_regs);
     530           0 :       if (BE (regs->end == NULL, 0))
     531             :         {
     532          32 :           re_free (regs->start);
     533             :           return REGS_UNALLOCATED;
     534          67 :         }
     535             :       regs->num_regs = need_regs;
     536             :     }
     537             :   else if (regs_allocated == REGS_REALLOCATE)
     538          67 :     { /* Yes.  If we need more elements than were already
     539             :          allocated, reallocate them.  If we need fewer, just
     540           0 :          leave it alone.  */
     541             :       if (BE (need_regs > regs->num_regs, 0))
     542           0 :         {
     543           0 :           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
     544           0 :           regoff_t *new_end;
     545           0 :           if (BE (new_start == NULL, 0))
     546             :             return REGS_UNALLOCATED;
     547           0 :           new_end = re_realloc (regs->end, regoff_t, need_regs);
     548           0 :           if (BE (new_end == NULL, 0))
     549             :             {
     550           0 :               re_free (new_start);
     551           0 :               return REGS_UNALLOCATED;
     552           0 :             }
     553             :           regs->start = new_start;
     554             :           regs->end = new_end;
     555             :           regs->num_regs = need_regs;
     556             :         }
     557           0 :     }
     558             :   else
     559           0 :     {
     560           0 :       assert (regs_allocated == REGS_FIXED);
     561             :       /* This function may not be called with REGS_FIXED and nregs too big.  */
     562             :       assert (regs->num_regs >= nregs);
     563             :       rval = REGS_FIXED;
     564         204 :     }
     565             : 
     566         105 :   /* Copy the regs.  */
     567         105 :   for (i = 0; i < nregs; ++i)
     568             :     {
     569         198 :       regs->start[i] = pmatch[i].rm_so;
     570          99 :       regs->end[i] = pmatch[i].rm_eo;
     571             :     }
     572          99 :   for ( ; i < regs->num_regs; ++i)
     573             :     regs->start[i] = regs->end[i] = -1;
     574             : 
     575             :   return rval;
     576             : }
     577             : 
     578             : /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
     579             :    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
     580             :    this memory for recording register information.  STARTS and ENDS
     581             :    must be allocated using the malloc library routine, and must each
     582             :    be at least NUM_REGS * sizeof (regoff_t) bytes long.
     583             : 
     584             :    If NUM_REGS == 0, then subsequent matches should allocate their own
     585             :    register data.
     586             : 
     587             :    Unless this function is called, the first search or match using
     588             :    PATTERN_BUFFER will allocate its own register data, without
     589           0 :    freeing the old data.  */
     590             : 
     591             : void
     592             : re_set_registers (bufp, regs, num_regs, starts, ends)
     593             :     struct re_pattern_buffer *bufp;
     594             :     struct re_registers *regs;
     595           0 :     __re_size_t num_regs;
     596             :     regoff_t *starts, *ends;
     597           0 : {
     598           0 :   if (num_regs)
     599           0 :     {
     600           0 :       bufp->regs_allocated = REGS_REALLOCATE;
     601             :       regs->num_regs = num_regs;
     602             :       regs->start = starts;
     603             :       regs->end = ends;
     604           0 :     }
     605           0 :   else
     606           0 :     {
     607             :       bufp->regs_allocated = REGS_UNALLOCATED;
     608           0 :       regs->num_regs = 0;
     609             :       regs->start = regs->end = NULL;
     610             :     }
     611             : }
     612             : #ifdef _LIBC
     613             : weak_alias (__re_set_registers, re_set_registers)
     614             : #endif
     615             : 
     616             : /* Entry points compatible with 4.2 BSD regex library.  We don't define
     617             :    them unless specifically requested.  */
     618             : 
     619             : #if defined _REGEX_RE_COMP || defined _LIBC
     620             : int
     621             : # ifdef _LIBC
     622             : weak_function
     623             : # endif
     624             : re_exec (s)
     625             :      const char *s;
     626             : {
     627             :   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
     628             : }
     629             : #endif /* _REGEX_RE_COMP */
     630             : 
     631             : /* Internal entry point.  */
     632             : 
     633             : /* Searches for a compiled pattern PREG in the string STRING, whose
     634             :    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
     635             :    meaning as with regexec.  LAST_START is START + RANGE, where
     636             :    START and RANGE have the same meaning as with re_search.
     637             :    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
     638             :    otherwise return the error code.
     639             :    Note: We assume front end functions already check ranges.
     640             :    (0 <= LAST_START && LAST_START <= LENGTH)  */
     641         356 : 
     642             : static reg_errcode_t
     643             : internal_function
     644             : re_search_internal (const regex_t *preg,
     645             :                     const char *string, Idx length,
     646             :                     Idx start, Idx last_start, Idx stop,
     647             :                     size_t nmatch, regmatch_t pmatch[],
     648         356 :                     int eflags)
     649             : {
     650             :   reg_errcode_t err;
     651             :   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
     652             :   Idx left_lim, right_lim;
     653             :   int incr;
     654         356 :   bool fl_longest_match;
     655             :   int match_kind;
     656             :   Idx match_first;
     657             :   Idx match_last = REG_MISSING;
     658             :   Idx extra_nmatch;
     659         356 :   bool sb;
     660             :   int ch;
     661             : #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
     662             :   re_match_context_t mctx = { .dfa = dfa };
     663        1068 : #else
     664         156 :   re_match_context_t mctx;
     665         445 : #endif
     666         356 :   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
     667             :                     && start != last_start && !preg->can_be_null)
     668             :                    ? preg->fastmap : NULL);
     669             :   RE_TRANSLATE_TYPE t = preg->translate;
     670             : 
     671             : #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
     672             :   memset (&mctx, '\0', sizeof (re_match_context_t));
     673         356 :   mctx.dfa = dfa;
     674         356 : #endif
     675             : 
     676             :   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
     677         356 :   nmatch -= extra_nmatch;
     678             : 
     679             :   /* Check if the DFA haven't been compiled.  */
     680           0 :   if (BE (preg->used == 0 || dfa->init_state == NULL
     681             :           || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
     682             :           || dfa->init_state_begbuf == NULL, 0))
     683             :     return REG_NOMATCH;
     684             : 
     685             : #ifdef DEBUG
     686             :   /* We assume front-end functions already check them.  */
     687             :   assert (0 <= last_start && last_start <= length);
     688             : #endif
     689             : 
     690         356 :   /* If initial states with non-begbuf contexts have no elements,
     691          35 :      the regex must be anchored.  If preg->newline_anchor is set,
     692          19 :      we'll never use init_state_nl, so do not check it.  */
     693          10 :   if (dfa->init_state->nodes.nelem == 0
     694             :       && dfa->init_state_word->nodes.nelem == 0
     695          10 :       && (dfa->init_state_nl->nodes.nelem == 0
     696           0 :           || !preg->newline_anchor))
     697          10 :     {
     698             :       if (start != 0 && last_start != 0)
     699             :         return REG_NOMATCH;
     700             :       start = last_start = 0;
     701         356 :     }
     702             : 
     703         356 :   /* We must check the longest matching, if nmatch > 0.  */
     704         356 :   fl_longest_match = (nmatch != 0 || dfa->nbackref);
     705         356 : 
     706           0 :   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
     707         356 :                             preg->translate, preg->syntax & RE_ICASE, dfa);
     708         356 :   if (BE (err != REG_NOERROR, 0))
     709         356 :     goto free_return;
     710             :   mctx.input.stop = stop;
     711         356 :   mctx.input.raw_stop = stop;
     712         356 :   mctx.input.newline_anchor = preg->newline_anchor;
     713           0 : 
     714             :   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
     715             :   if (BE (err != REG_NOERROR, 0))
     716             :     goto free_return;
     717             : 
     718             :   /* We will log all the DFA states through which the dfa pass,
     719         356 :      if nmatch > 1, or this dfa has "multibyte node", which is a
     720             :      back-reference or a node which can accept multibyte character or
     721             :      multi character collating element.  */
     722          70 :   if (nmatch > 1 || dfa->has_mb_node)
     723             :     {
     724           0 :       /* Avoid overflow.  */
     725           0 :       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
     726             :         {
     727             :           err = REG_ESPACE;
     728          70 :           goto free_return;
     729         140 :         }
     730             : 
     731           0 :       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
     732           0 :       if (BE (mctx.state_log == NULL, 0))
     733             :         {
     734             :           err = REG_ESPACE;
     735             :           goto free_return;
     736         286 :         }
     737             :     }
     738         356 :   else
     739         712 :     mctx.state_log = NULL;
     740         356 : 
     741             :   match_first = start;
     742             :   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
     743         356 :                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
     744         356 : 
     745         356 :   /* Check incrementally whether of not the input string match.  */
     746         356 :   incr = (last_start < start) ? -1 : 1;
     747         356 :   left_lim = (last_start < start) ? last_start : start;
     748             :   right_lim = (last_start < start) ? start : last_start;
     749          89 :   sb = dfa->mb_cur_max == 1;
     750          89 :   match_kind =
     751          89 :     (fastmap
     752         445 :      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
     753             :         | (start <= last_start ? 2 : 0)
     754         138 :         | (t != NULL ? 1 : 0))
     755             :      : 8);
     756         632 : 
     757         988 :   for (;; match_first += incr)
     758             :     {
     759             :       err = REG_NOMATCH;
     760             :       if (match_first < left_lim || right_lim < match_first)
     761             :         goto free_return;
     762             : 
     763             :       /* Advance as rapidly as possible through the string, until we
     764             :          find a plausible place to start matching.  This may be done
     765         412 :          with varying efficiency, so there are various possibilities:
     766             :          only the most common of them are specialized, in order to
     767         315 :          save on code size.  We use a switch statement for speed.  */
     768             :       switch (match_kind)
     769         315 :         {
     770             :         case 8:
     771           0 :           /* No fastmap.  */
     772             :           break;
     773           0 : 
     774           0 :         case 7:
     775           0 :           /* Fastmap with single-byte translation, match forward.  */
     776           0 :           while (BE (match_first < right_lim, 1)
     777             :                  && !fastmap[t[(unsigned char) string[match_first]]])
     778          97 :             ++match_first;
     779             :           goto forward_match_found_start_or_reached_end;
     780         745 : 
     781         570 :         case 6:
     782         551 :           /* Fastmap without translation, match forward.  */
     783             :           while (BE (match_first < right_lim, 1)
     784          97 :                  && !fastmap[(unsigned char) string[match_first]])
     785          97 :             ++match_first;
     786             : 
     787         156 :         forward_match_found_start_or_reached_end:
     788          78 :           if (BE (match_first == right_lim, 0))
     789          78 :             {
     790          75 :               ch = match_first >= length
     791             :                        ? 0 : (unsigned char) string[match_first];
     792          22 :               if (!fastmap[t ? t[ch] : ch])
     793             :                 goto free_return;
     794           0 :             }
     795             :           break;
     796             : 
     797           0 :         case 4:
     798             :         case 5:
     799           0 :           /* Fastmap without multi-byte translation, match backwards.  */
     800           0 :           while (match_first >= left_lim)
     801           0 :             {
     802           0 :               ch = match_first >= length
     803           0 :                        ? 0 : (unsigned char) string[match_first];
     804             :               if (fastmap[t ? t[ch] : ch])
     805           0 :                 break;
     806           0 :               --match_first;
     807           0 :             }
     808             :           if (match_first < left_lim)
     809           0 :             goto free_return;
     810             :           break;
     811             : 
     812             :         default:
     813             :           /* In this case, we can't determine easily the current byte,
     814           0 :              since it might be a component byte of a multibyte
     815             :              character.  Then we use the constructed buffer instead.  */
     816             :           for (;;)
     817           0 :             {
     818           0 :               /* If MATCH_FIRST is out of the valid range, reconstruct the
     819             :                  buffers.  */
     820           0 :               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
     821             :               if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
     822           0 :                 {
     823           0 :                   err = re_string_reconstruct (&mctx.input, match_first,
     824             :                                                eflags);
     825           0 :                   if (BE (err != REG_NOERROR, 0))
     826             :                     goto free_return;
     827             : 
     828             :                   offset = match_first - mctx.input.raw_mbs_idx;
     829           0 :                 }
     830           0 :               /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
     831           0 :                  Note that MATCH_FIRST must not be smaller than 0.  */
     832           0 :               ch = (match_first >= length
     833           0 :                     ? 0 : re_string_byte_at (&mctx.input, offset));
     834           0 :               if (fastmap[ch])
     835             :                 break;
     836           0 :               match_first += incr;
     837           0 :               if (match_first < left_lim || match_first > right_lim)
     838             :                 {
     839             :                   err = REG_NOMATCH;
     840           0 :                   goto free_return;
     841             :                 }
     842             :             }
     843             :           break;
     844             :         }
     845         337 : 
     846         337 :       /* Reconstruct the buffers so that the matcher can assume that
     847           0 :          the matching starts from the beginning of the buffer.  */
     848             :       err = re_string_reconstruct (&mctx.input, match_first, eflags);
     849             :       if (BE (err != REG_NOERROR, 0))
     850             :         goto free_return;
     851             : 
     852         337 : #ifdef RE_ENABLE_I18N
     853           0 :      /* Don't consider this char as a possible match start if it part,
     854             :         yet isn't the head, of a multibyte character.  */
     855             :       if (!sb && !re_string_first_byte (&mctx.input, 0))
     856             :         continue;
     857             : #endif
     858         337 : 
     859         337 :       /* It seems to be appropriate one, then use the matcher.  */
     860             :       /* We assume that the matching starts from 0.  */
     861         337 :       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
     862             :       match_last = check_matching (&mctx, fl_longest_match,
     863         199 :                                    start <= last_start ? &match_first : NULL);
     864             :       if (match_last != REG_MISSING)
     865           0 :         {
     866           0 :           if (BE (match_last == REG_ERROR, 0))
     867             :             {
     868             :               err = REG_ESPACE;
     869             :               goto free_return;
     870         199 :             }
     871         199 :           else
     872             :             {
     873           6 :               mctx.match_last = match_last;
     874           6 :               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
     875             :                 {
     876             :                   re_dfastate_t *pstate = mctx.state_log[match_last];
     877         199 :                   mctx.last_node = check_halt_state_context (&mctx, pstate,
     878         193 :                                                              match_last);
     879             :                 }
     880           6 :               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
     881           6 :                   || dfa->nbackref)
     882           6 :                 {
     883           0 :                   err = prune_impossible_nodes (&mctx);
     884           0 :                   if (err == REG_NOERROR)
     885           0 :                     break;
     886             :                   if (BE (err != REG_NOMATCH, 0))
     887             :                     goto free_return;
     888             :                   match_last = REG_MISSING;
     889             :                 }
     890             :               else
     891             :                 break; /* We found a match.  */
     892         138 :             }
     893             :         }
     894             : 
     895             :       match_ctx_clean (&mctx);
     896             :     }
     897             : 
     898             : #ifdef DEBUG
     899             :   assert (match_last != REG_MISSING);
     900             :   assert (err == REG_NOERROR);
     901         199 : #endif
     902             : 
     903             :   /* Set pmatch[] if we need.  */
     904             :   if (nmatch > 0)
     905             :     {
     906         205 :       Idx reg_idx;
     907           6 : 
     908             :       /* Initialize registers.  */
     909             :       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
     910         199 :         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
     911         199 : 
     912             :       /* Set the points where matching start/end.  */
     913             :       pmatch[0].rm_so = 0;
     914             :       pmatch[0].rm_eo = mctx.match_last;
     915             :       /* FIXME: This function should fail if mctx.match_last exceeds
     916         199 :          the maximum possible regoff_t value.  We need a new error
     917             :          code REG_OVERFLOW.  */
     918           6 : 
     919           6 :       if (!preg->no_sub && nmatch > 1)
     920           6 :         {
     921           0 :           err = set_regs (preg, &mctx, nmatch, pmatch,
     922             :                           dfa->has_plural_match && dfa->nbackref > 0);
     923             :           if (BE (err != REG_NOERROR, 0))
     924             :             goto free_return;
     925             :         }
     926             : 
     927         404 :       /* At last, add the offset to the each registers, since we slided
     928         205 :          the buffers so that we could assume that the matching starts
     929             :          from 0.  */
     930             :       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
     931         205 :         if (pmatch[reg_idx].rm_so != -1)
     932             :           {
     933           0 : #ifdef RE_ENABLE_I18N
     934           0 :             if (BE (mctx.input.offsets_needed != 0, 0))
     935           0 :               {
     936           0 :                 pmatch[reg_idx].rm_so =
     937           0 :                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
     938           0 :                    ? mctx.input.valid_raw_len
     939           0 :                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
     940           0 :                 pmatch[reg_idx].rm_eo =
     941             :                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
     942             :                    ? mctx.input.valid_raw_len
     943             :                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
     944             :               }
     945         205 : #else
     946         205 :             assert (mctx.input.offsets_needed == 0);
     947             : #endif
     948         199 :             pmatch[reg_idx].rm_so += match_first;
     949             :             pmatch[reg_idx].rm_eo += match_first;
     950           0 :           }
     951           0 :       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
     952             :         {
     953             :           pmatch[nmatch + reg_idx].rm_so = -1;
     954         199 :           pmatch[nmatch + reg_idx].rm_eo = -1;
     955           0 :         }
     956           0 : 
     957             :       if (dfa->subexp_map)
     958           0 :         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
     959           0 :           if (dfa->subexp_map[reg_idx] != reg_idx)
     960           0 :             {
     961           0 :               pmatch[reg_idx + 1].rm_so
     962             :                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
     963             :               pmatch[reg_idx + 1].rm_eo
     964             :                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
     965         281 :             }
     966         356 :     }
     967         356 : 
     968           0 :  free_return:
     969         356 :   re_free (mctx.state_log);
     970         356 :   if (dfa->nbackref)
     971             :     match_ctx_free (&mctx);
     972             :   re_string_destruct (&mctx.input);
     973             :   return err;
     974             : }
     975           6 : 
     976             : static reg_errcode_t
     977           6 : internal_function
     978             : prune_impossible_nodes (re_match_context_t *mctx)
     979             : {
     980             :   const re_dfa_t *const dfa = mctx->dfa;
     981           6 :   Idx halt_node, match_last;
     982             :   reg_errcode_t ret;
     983             :   re_dfastate_t **sifted_states;
     984             :   re_dfastate_t **lim_states = NULL;
     985             :   re_sift_context_t sctx;
     986           6 : #ifdef DEBUG
     987           6 :   assert (mctx->state_log != NULL);
     988             : #endif
     989             :   match_last = mctx->match_last;
     990           6 :   halt_node = mctx->last_node;
     991           0 : 
     992             :   /* Avoid overflow.  */
     993           6 :   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
     994           6 :     return REG_ESPACE;
     995             : 
     996           0 :   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
     997           0 :   if (BE (sifted_states == NULL, 0))
     998             :     {
     999           6 :       ret = REG_ESPACE;
    1000             :       goto free_return;
    1001           0 :     }
    1002           0 :   if (dfa->nbackref)
    1003             :     {
    1004           0 :       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
    1005           0 :       if (BE (lim_states == NULL, 0))
    1006             :         {
    1007             :           ret = REG_ESPACE;
    1008             :           goto free_return;
    1009           0 :         }
    1010             :       while (1)
    1011           0 :         {
    1012             :           memset (lim_states, '\0',
    1013           0 :                   sizeof (re_dfastate_t *) * (match_last + 1));
    1014           0 :           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
    1015           0 :                          match_last);
    1016           0 :           ret = sift_states_backward (mctx, &sctx);
    1017           0 :           re_node_set_free (&sctx.limits);
    1018             :           if (BE (ret != REG_NOERROR, 0))
    1019             :               goto free_return;
    1020             :           if (sifted_states[0] != NULL || lim_states[0] != NULL)
    1021           0 :             break;
    1022           0 :           do
    1023             :             {
    1024           0 :               --match_last;
    1025           0 :               if (! REG_VALID_INDEX (match_last))
    1026             :                 {
    1027           0 :                   ret = REG_NOMATCH;
    1028           0 :                   goto free_return;
    1029           0 :                 }
    1030           0 :             } while (mctx->state_log[match_last] == NULL
    1031             :                      || !mctx->state_log[match_last]->halt);
    1032             :           halt_node = check_halt_state_context (mctx,
    1033           0 :                                                 mctx->state_log[match_last],
    1034             :                                                 match_last);
    1035           0 :         }
    1036           0 :       ret = merge_state_array (dfa, sifted_states, lim_states,
    1037           0 :                                match_last + 1);
    1038           0 :       re_free (lim_states);
    1039             :       lim_states = NULL;
    1040             :       if (BE (ret != REG_NOERROR, 0))
    1041             :         goto free_return;
    1042           6 :     }
    1043           6 :   else
    1044           6 :     {
    1045           6 :       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
    1046           0 :       ret = sift_states_backward (mctx, &sctx);
    1047             :       re_node_set_free (&sctx.limits);
    1048           6 :       if (BE (ret != REG_NOERROR, 0))
    1049           6 :         goto free_return;
    1050           6 :     }
    1051           6 :   re_free (mctx->state_log);
    1052           6 :   mctx->state_log = sifted_states;
    1053           6 :   sifted_states = NULL;
    1054           6 :   mctx->last_node = halt_node;
    1055           6 :   mctx->match_last = match_last;
    1056           6 :   ret = REG_NOERROR;
    1057           6 :  free_return:
    1058             :   re_free (sifted_states);
    1059             :   re_free (lim_states);
    1060             :   return ret;
    1061             : }
    1062             : 
    1063             : /* Acquire an initial state and return it.
    1064             :    We must select appropriate initial state depending on the context,
    1065             :    since initial states may have constraints like "\<", "^", etc..  */
    1066             : 
    1067             : static inline re_dfastate_t *
    1068             : __attribute ((always_inline)) internal_function
    1069         337 : acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
    1070         337 :                             Idx idx)
    1071             : {
    1072             :   const re_dfa_t *const dfa = mctx->dfa;
    1073         110 :   if (dfa->init_state->has_constraint)
    1074         110 :     {
    1075          16 :       unsigned int context;
    1076          94 :       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
    1077          22 :       if (IS_WORD_CONTEXT (context))
    1078          72 :         return dfa->init_state_word;
    1079          70 :       else if (IS_ORDINARY_CONTEXT (context))
    1080           2 :         return dfa->init_state;
    1081           2 :       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
    1082           0 :         return dfa->init_state_begbuf;
    1083             :       else if (IS_NEWLINE_CONTEXT (context))
    1084             :         return dfa->init_state_nl;
    1085           0 :       else if (IS_BEGBUF_CONTEXT (context))
    1086           0 :         {
    1087             :           /* It is relatively rare case, then calculate on demand.  */
    1088             :           return re_acquire_state_context (err, dfa,
    1089             :                                            dfa->init_state->entrance_nodes,
    1090             :                                            context);
    1091           0 :         }
    1092             :       else
    1093             :         /* Must not happen?  */
    1094         227 :         return dfa->init_state;
    1095             :     }
    1096             :   else
    1097             :     return dfa->init_state;
    1098             : }
    1099             : 
    1100             : /* Check whether the regular expression match input string INPUT or not,
    1101             :    and return the index where the matching end.  Return REG_MISSING if
    1102             :    there is no match, and return REG_ERROR in case of an error.
    1103             :    FL_LONGEST_MATCH means we want the POSIX longest matching.
    1104             :    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
    1105             :    next place where we may want to try matching.
    1106             :    Note that the matcher assume that the maching starts from the current
    1107             :    index of the buffer.  */
    1108         337 : 
    1109             : static Idx
    1110             : internal_function
    1111         337 : check_matching (re_match_context_t *mctx, bool fl_longest_match,
    1112             :                 Idx *p_match_first)
    1113         337 : {
    1114         337 :   const re_dfa_t *const dfa = mctx->dfa;
    1115         337 :   reg_errcode_t err;
    1116             :   Idx match = 0;
    1117         337 :   Idx match_last = REG_MISSING;
    1118         337 :   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
    1119             :   re_dfastate_t *cur_state;
    1120         337 :   bool at_init_state = p_match_first != NULL;
    1121         337 :   Idx next_start_idx = cur_str_idx;
    1122             : 
    1123         337 :   err = REG_NOERROR;
    1124             :   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
    1125           0 :   /* An initial state must not be NULL (invalid).  */
    1126           0 :   if (BE (cur_state == NULL, 0))
    1127             :     {
    1128             :       assert (err == REG_ESPACE);
    1129         337 :       return REG_ERROR;
    1130             :     }
    1131           8 : 
    1132             :   if (mctx->state_log != NULL)
    1133             :     {
    1134             :       mctx->state_log[cur_str_idx] = cur_state;
    1135           8 : 
    1136             :       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
    1137           0 :          later.  E.g. Processing back references.  */
    1138           0 :       if (BE (dfa->nbackref, 0))
    1139           0 :         {
    1140           0 :           at_init_state = false;
    1141             :           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
    1142           0 :           if (BE (err != REG_NOERROR, 0))
    1143             :             return err;
    1144           0 : 
    1145           0 :           if (cur_state->has_backref)
    1146           0 :             {
    1147             :               err = transit_state_bkref (mctx, &cur_state->nodes);
    1148             :               if (BE (err != REG_NOERROR, 0))
    1149             :                 return err;
    1150             :             }
    1151             :         }
    1152         337 :     }
    1153             : 
    1154         180 :   /* If the RE accepts NULL string.  */
    1155         110 :   if (BE (cur_state->halt, 0))
    1156             :     {
    1157         126 :       if (!cur_state->has_constraint
    1158           0 :           || check_halt_state_context (mctx, cur_state, cur_str_idx))
    1159             :         {
    1160             :           if (!fl_longest_match)
    1161         126 :             return cur_str_idx;
    1162         126 :           else
    1163             :             {
    1164             :               match_last = cur_str_idx;
    1165             :               match = 1;
    1166             :             }
    1167         768 :         }
    1168             :     }
    1169         250 : 
    1170         250 :   while (!re_string_eoi (&mctx->input))
    1171             :     {
    1172         250 :       re_dfastate_t *old_state = cur_state;
    1173         250 :       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
    1174         141 : 
    1175             :       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
    1176           0 :           || (BE (next_char_idx >= mctx->input.valid_len, 0)
    1177           0 :               && mctx->input.valid_len < mctx->input.len))
    1178             :         {
    1179           0 :           err = extend_buffers (mctx);
    1180           0 :           if (BE (err != REG_NOERROR, 0))
    1181             :             {
    1182             :               assert (err == REG_ESPACE);
    1183             :               return REG_ERROR;
    1184         250 :             }
    1185         250 :         }
    1186          20 : 
    1187             :       cur_state = transit_state (&err, mctx, cur_state);
    1188         250 :       if (mctx->state_log != NULL)
    1189             :         cur_state = merge_state_with_log (&err, mctx, cur_state);
    1190             : 
    1191             :       if (cur_state == NULL)
    1192             :         {
    1193         156 :           /* Reached the invalid state or an error.  Try to recover a valid
    1194           0 :              state using the state log, if available and if we have not
    1195             :              already found a valid (even if not the longest) match.  */
    1196         156 :           if (BE (err != REG_NOERROR, 0))
    1197           7 :             return REG_ERROR;
    1198           7 : 
    1199             :           if (mctx->state_log == NULL
    1200             :               || (match && !fl_longest_match)
    1201             :               || (cur_state = find_recover_state (&err, mctx)) == NULL)
    1202          94 :             break;
    1203             :         }
    1204          36 : 
    1205           1 :       if (BE (at_init_state, 0))
    1206             :         {
    1207          35 :           if (old_state == cur_state)
    1208             :             next_start_idx = next_char_idx;
    1209             :           else
    1210          94 :             at_init_state = false;
    1211             :         }
    1212             : 
    1213             :       if (cur_state->halt)
    1214          83 :         {
    1215          11 :           /* Reached a halt state.
    1216             :              Check the halt state can satisfy the current context.  */
    1217             :           if (!cur_state->has_constraint
    1218             :               || check_halt_state_context (mctx, cur_state,
    1219          79 :                                            re_string_cur_idx (&mctx->input)))
    1220          79 :             {
    1221             :               /* We found an appropriate halt state.  */
    1222             :               match_last = re_string_cur_idx (&mctx->input);
    1223          79 :               match = 1;
    1224          79 : 
    1225           0 :               /* We found a match, do not modify match_first below.  */
    1226             :               p_match_first = NULL;
    1227             :               if (!fl_longest_match)
    1228             :                 break;
    1229             :             }
    1230         337 :         }
    1231         203 :     }
    1232             : 
    1233         337 :   if (p_match_first)
    1234             :     *p_match_first += next_start_idx;
    1235             : 
    1236             :   return match_last;
    1237             : }
    1238             : 
    1239             : /* Check NODE match the current context.  */
    1240         341 : 
    1241             : static bool
    1242         341 : internal_function
    1243         341 : check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
    1244         341 : {
    1245         241 :   re_token_type_t type = dfa->nodes[node].type;
    1246         100 :   unsigned int constraint = dfa->nodes[node].constraint;
    1247          10 :   if (type != END_OF_RE)
    1248          90 :     return false;
    1249          31 :   if (!constraint)
    1250          59 :     return true;
    1251             :   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
    1252             :     return false;
    1253             :   return true;
    1254             : }
    1255             : 
    1256             : /* Check the halt state STATE match the current context.
    1257             :    Return 0 if not match, if the node, STATE has, is a halt node and
    1258             :    match the context, return the node.  */
    1259         127 : 
    1260             : static Idx
    1261             : internal_function
    1262             : check_halt_state_context (const re_match_context_t *mctx,
    1263             :                           const re_dfastate_t *state, Idx idx)
    1264             : {
    1265             :   Idx i;
    1266             :   unsigned int context;
    1267         127 : #ifdef DEBUG
    1268         399 :   assert (state->halt);
    1269         341 : #endif
    1270          69 :   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
    1271          58 :   for (i = 0; i < state->nodes.nelem; ++i)
    1272             :     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
    1273             :       return state->nodes.elems[i];
    1274             :   return 0;
    1275             : }
    1276             : 
    1277             : /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
    1278             :    corresponding to the DFA).
    1279             :    Return the destination node, and update EPS_VIA_NODES;
    1280             :    return REG_MISSING in case of errors.  */
    1281          58 : 
    1282             : static Idx
    1283             : internal_function
    1284             : proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
    1285          58 :                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
    1286             :                    struct re_fail_stack_t *fs)
    1287             : {
    1288          58 :   const re_dfa_t *const dfa = mctx->dfa;
    1289             :   Idx i;
    1290          47 :   bool ok;
    1291          47 :   if (IS_EPSILON_NODE (dfa->nodes[node].type))
    1292             :     {
    1293          47 :       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
    1294          47 :       re_node_set *edests = &dfa->edests[node];
    1295           0 :       Idx dest_node;
    1296             :       ok = re_node_set_insert (eps_via_nodes, node);
    1297             :       if (BE (! ok, 0))
    1298         123 :         return REG_ERROR;
    1299             :       /* Pick up a valid destination, or return REG_MISSING if none
    1300          76 :          is found.  */
    1301          76 :       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
    1302          29 :         {
    1303          47 :           Idx candidate = edests->elems[i];
    1304          47 :           if (!re_node_set_contains (cur_nodes, candidate))
    1305             :             continue;
    1306             :           if (dest_node == REG_MISSING)
    1307             :             dest_node = candidate;
    1308             : 
    1309             :           else
    1310           0 :             {
    1311           0 :               /* In order to avoid infinite loop like "(a*)*", return the second
    1312             :                  epsilon-transition if the first was already considered.  */
    1313             :               if (re_node_set_contains (eps_via_nodes, dest_node))
    1314           0 :                 return candidate;
    1315           0 : 
    1316             :               /* Otherwise, push the second epsilon-transition on the fail stack.  */
    1317           0 :               else if (fs != NULL
    1318             :                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
    1319             :                                            eps_via_nodes))
    1320           0 :                 return REG_ERROR;
    1321             : 
    1322             :               /* We know we are going to exit.  */
    1323          47 :               break;
    1324             :             }
    1325             :         }
    1326             :       return dest_node;
    1327          11 :     }
    1328          11 :   else
    1329             :     {
    1330             :       Idx naccepted = 0;
    1331          11 :       re_token_type_t type = dfa->nodes[node].type;
    1332           0 : 
    1333             : #ifdef RE_ENABLE_I18N
    1334             :       if (dfa->nodes[node].accept_mb)
    1335          11 :         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
    1336             :       else
    1337           0 : #endif /* RE_ENABLE_I18N */
    1338           0 :       if (type == OP_BACK_REF)
    1339           0 :         {
    1340             :           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
    1341           0 :           naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
    1342           0 :           if (fs != NULL)
    1343           0 :             {
    1344             :               if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
    1345           0 :                 return REG_MISSING;
    1346           0 :               else if (naccepted)
    1347             :                 {
    1348           0 :                   char *buf = (char *) re_string_get_buffer (&mctx->input);
    1349             :                   if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
    1350             :                               naccepted) != 0)
    1351             :                     return REG_MISSING;
    1352           0 :                 }
    1353             :             }
    1354             : 
    1355           0 :           if (naccepted == 0)
    1356           0 :             {
    1357           0 :               Idx dest_node;
    1358           0 :               ok = re_node_set_insert (eps_via_nodes, node);
    1359           0 :               if (BE (! ok, 0))
    1360             :                 return REG_ERROR;
    1361           0 :               dest_node = dfa->edests[node].elems[0];
    1362             :               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
    1363             :                                         dest_node))
    1364             :                 return dest_node;
    1365          11 :             }
    1366          11 :         }
    1367             : 
    1368          11 :       if (naccepted != 0
    1369          11 :           || check_node_accept (mctx, dfa->nodes + node, *pidx))
    1370          11 :         {
    1371           0 :           Idx dest_node = dfa->nexts[node];
    1372             :           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
    1373           0 :           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
    1374          11 :                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
    1375          11 :                                                dest_node)))
    1376             :             return REG_MISSING;
    1377             :           re_node_set_empty (eps_via_nodes);
    1378           0 :           return dest_node;
    1379             :         }
    1380             :     }
    1381             :   return REG_MISSING;
    1382             : }
    1383           0 : 
    1384             : static reg_errcode_t
    1385             : internal_function
    1386             : push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
    1387           0 :                  Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
    1388           0 : {
    1389             :   reg_errcode_t err;
    1390             :   Idx num = fs->num++;
    1391           0 :   if (fs->num == fs->alloc)
    1392           0 :     {
    1393           0 :       struct re_fail_stack_ent_t *new_array;
    1394           0 :       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
    1395           0 :                                        * fs->alloc * 2));
    1396           0 :       if (new_array == NULL)
    1397             :         return REG_ESPACE;
    1398           0 :       fs->alloc *= 2;
    1399           0 :       fs->stack = new_array;
    1400           0 :     }
    1401           0 :   fs->stack[num].idx = str_idx;
    1402           0 :   fs->stack[num].node = dest_node;
    1403           0 :   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
    1404           0 :   if (fs->stack[num].regs == NULL)
    1405           0 :     return REG_ESPACE;
    1406             :   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
    1407             :   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
    1408             :   return err;
    1409             : }
    1410           0 : 
    1411             : static Idx
    1412             : internal_function
    1413           0 : pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
    1414           0 :                 regmatch_t *regs, re_node_set *eps_via_nodes)
    1415           0 : {
    1416           0 :   Idx num = --fs->num;
    1417           0 :   assert (REG_VALID_INDEX (num));
    1418           0 :   *pidx = fs->stack[num].idx;
    1419           0 :   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
    1420           0 :   re_node_set_free (eps_via_nodes);
    1421             :   re_free (fs->stack[num].regs);
    1422             :   *eps_via_nodes = fs->stack[num].eps_via_nodes;
    1423             :   return fs->stack[num].node;
    1424             : }
    1425             : 
    1426             : /* Set the positions where the subexpressions are starts/ends to registers
    1427             :    PMATCH.
    1428             :    Note: We assume that pmatch[0] is already set, and
    1429             :    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
    1430           6 : 
    1431             : static reg_errcode_t
    1432             : internal_function
    1433           6 : set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
    1434             :           regmatch_t *pmatch, bool fl_backtrack)
    1435             : {
    1436             :   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
    1437           6 :   Idx idx, cur_node;
    1438             :   re_node_set eps_via_nodes;
    1439           6 :   struct re_fail_stack_t *fs;
    1440             :   struct re_fail_stack_t fs_body = { 0, 2, NULL };
    1441             :   regmatch_t *prev_idx_match;
    1442             :   bool prev_idx_match_malloced = false;
    1443             : 
    1444             : #ifdef DEBUG
    1445           6 :   assert (nmatch > 1);
    1446             :   assert (mctx->state_log != NULL);
    1447           0 : #endif
    1448           0 :   if (fl_backtrack)
    1449           0 :     {
    1450           0 :       fs = &fs_body;
    1451             :       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
    1452             :       if (fs->stack == NULL)
    1453           6 :         return REG_ESPACE;
    1454             :     }
    1455           6 :   else
    1456           6 :     fs = NULL;
    1457             : 
    1458           6 :   cur_node = dfa->init_node;
    1459           6 :   re_node_set_init_empty (&eps_via_nodes);
    1460             : 
    1461             :   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
    1462           0 :     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
    1463           0 :   else
    1464             :     {
    1465           0 :       prev_idx_match = re_malloc (regmatch_t, nmatch);
    1466           0 :       if (prev_idx_match == NULL)
    1467             :         {
    1468           0 :           free_fail_stack_return (fs);
    1469             :           return REG_ESPACE;
    1470           6 :         }
    1471             :       prev_idx_match_malloced = true;
    1472          70 :     }
    1473             :   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
    1474          64 : 
    1475             :   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
    1476          64 :     {
    1477             :       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
    1478             : 
    1479           6 :       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
    1480             :         {
    1481           0 :           Idx reg_idx;
    1482           0 :           if (fs)
    1483           0 :             {
    1484           0 :               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
    1485             :                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
    1486           0 :                   break;
    1487           0 :               if (reg_idx == nmatch)
    1488           0 :                 {
    1489           0 :                   re_node_set_free (&eps_via_nodes);
    1490             :                   if (prev_idx_match_malloced)
    1491           0 :                     re_free (prev_idx_match);
    1492             :                   return free_fail_stack_return (fs);
    1493             :                 }
    1494             :               cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
    1495             :                                          &eps_via_nodes);
    1496           6 :             }
    1497           6 :           else
    1498           0 :             {
    1499           6 :               re_node_set_free (&eps_via_nodes);
    1500             :               if (prev_idx_match_malloced)
    1501             :                 re_free (prev_idx_match);
    1502             :               return REG_NOERROR;
    1503             :             }
    1504          58 :         }
    1505             : 
    1506             :       /* Proceed to next node.  */
    1507          58 :       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
    1508             :                                     &eps_via_nodes, fs);
    1509           0 : 
    1510             :       if (BE (! REG_VALID_INDEX (cur_node), 0))
    1511           0 :         {
    1512           0 :           if (BE (cur_node == REG_ERROR, 0))
    1513           0 :             {
    1514           0 :               re_node_set_free (&eps_via_nodes);
    1515           0 :               if (prev_idx_match_malloced)
    1516             :                 re_free (prev_idx_match);
    1517           0 :               free_fail_stack_return (fs);
    1518           0 :               return REG_ESPACE;
    1519             :             }
    1520             :           if (fs)
    1521             :             cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
    1522           0 :                                        &eps_via_nodes);
    1523           0 :           else
    1524           0 :             {
    1525           0 :               re_node_set_free (&eps_via_nodes);
    1526             :               if (prev_idx_match_malloced)
    1527             :                 re_free (prev_idx_match);
    1528             :               return REG_NOMATCH;
    1529           0 :             }
    1530           0 :         }
    1531           0 :     }
    1532           0 :   re_node_set_free (&eps_via_nodes);
    1533             :   if (prev_idx_match_malloced)
    1534             :     re_free (prev_idx_match);
    1535             :   return free_fail_stack_return (fs);
    1536             : }
    1537           0 : 
    1538             : static reg_errcode_t
    1539           0 : internal_function
    1540             : free_fail_stack_return (struct re_fail_stack_t *fs)
    1541             : {
    1542           0 :   if (fs)
    1543             :     {
    1544           0 :       Idx fs_idx;
    1545           0 :       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
    1546             :         {
    1547           0 :           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
    1548             :           re_free (fs->stack[fs_idx].regs);
    1549           0 :         }
    1550             :       re_free (fs->stack);
    1551             :     }
    1552             :   return REG_NOERROR;
    1553             : }
    1554          64 : 
    1555             : static void
    1556             : internal_function
    1557          64 : update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
    1558          64 :              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
    1559             : {
    1560           6 :   int type = dfa->nodes[cur_node].type;
    1561             :   if (type == OP_OPEN_SUBEXP)
    1562             :     {
    1563           6 :       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
    1564             : 
    1565           6 :       /* We are at the first node of this sub expression.  */
    1566           6 :       if (reg_num < nmatch)
    1567             :         {
    1568             :           pmatch[reg_num].rm_so = cur_idx;
    1569          58 :           pmatch[reg_num].rm_eo = -1;
    1570             :         }
    1571           6 :     }
    1572           6 :   else if (type == OP_CLOSE_SUBEXP)
    1573             :     {
    1574             :       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
    1575           6 :       if (reg_num < nmatch)
    1576             :         {
    1577           0 :           /* We are at the last node of this sub expression.  */
    1578             :           if (pmatch[reg_num].rm_so < cur_idx)
    1579             :             {
    1580           0 :               pmatch[reg_num].rm_eo = cur_idx;
    1581             :               /* This is a non-empty match or we are not inside an optional
    1582             :                  subexpression.  Accept this right away.  */
    1583             :               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
    1584           6 :             }
    1585           0 :           else
    1586             :             {
    1587             :               if (dfa->nodes[cur_node].opt_subexp
    1588             :                   && prev_idx_match[reg_num].rm_so != -1)
    1589             :                 /* We transited through an empty match for an optional
    1590             :                    subexpression, like (a?)*, and this is not the subexp's
    1591           0 :                    first match.  Copy back the old content of the registers
    1592             :                    so that matches of an inner subexpression are undone as
    1593             :                    well, like in ((a?))*.  */
    1594             :                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
    1595           6 :               else
    1596             :                 /* We completed a subexpression, but it may be part of
    1597             :                    an optional one, so do not update PREV_IDX_MATCH.  */
    1598             :                 pmatch[reg_num].rm_eo = cur_idx;
    1599          64 :             }
    1600             :         }
    1601             :     }
    1602             : }
    1603             : 
    1604             : /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
    1605             :    and sift the nodes in each states according to the following rules.
    1606             :    Updated state_log will be wrote to STATE_LOG.
    1607             : 
    1608             :    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
    1609             :      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
    1610             :         If `a' isn't the LAST_NODE and `a' can't epsilon transit to
    1611             :         the LAST_NODE, we throw away the node `a'.
    1612             :      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
    1613             :         string `s' and transit to `b':
    1614             :         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
    1615             :            away the node `a'.
    1616             :         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
    1617             :             thrown away, we throw away the node `a'.
    1618             :      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
    1619             :         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
    1620             :            node `a'.
    1621             :         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
    1622             :             we throw away the node `a'.  */
    1623             : 
    1624             : #define STATE_NODE_CONTAINS(state,node) \
    1625             :   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
    1626           6 : 
    1627             : static reg_errcode_t
    1628             : internal_function
    1629           6 : sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
    1630           6 : {
    1631             :   reg_errcode_t err;
    1632             :   int null_cnt = 0;
    1633             :   Idx str_idx = sctx->last_str_idx;
    1634             :   re_node_set cur_dest;
    1635             : 
    1636             : #ifdef DEBUG
    1637             :   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
    1638             : #endif
    1639           6 : 
    1640           6 :   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
    1641           0 :      transit to the last_node and the last_node itself.  */
    1642           6 :   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
    1643           6 :   if (BE (err != REG_NOERROR, 0))
    1644           0 :     return err;
    1645             :   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
    1646             :   if (BE (err != REG_NOERROR, 0))
    1647          23 :     goto free_return;
    1648             : 
    1649             :   /* Then check each states in the state_log.  */
    1650          11 :   while (str_idx > 0)
    1651          11 :     {
    1652             :       /* Update counters.  */
    1653           0 :       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
    1654             :       if (null_cnt > mctx->max_mb_elem_len)
    1655           0 :         {
    1656           0 :           memset (sctx->sifted_states, '\0',
    1657             :                   sizeof (re_dfastate_t *) * str_idx);
    1658          11 :           re_node_set_free (&cur_dest);
    1659          11 :           return REG_NOERROR;
    1660             :         }
    1661          11 :       re_node_set_empty (&cur_dest);
    1662             :       --str_idx;
    1663          11 : 
    1664          11 :       if (mctx->state_log[str_idx])
    1665           0 :         {
    1666             :           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
    1667             :           if (BE (err != REG_NOERROR, 0))
    1668             :             goto free_return;
    1669             :         }
    1670             : 
    1671             :       /* Add all the nodes which satisfy the following conditions:
    1672          11 :          - It can epsilon transit to a node in CUR_DEST.
    1673          11 :          - It is in CUR_SRC.
    1674           0 :          And update state_log.  */
    1675             :       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
    1676           6 :       if (BE (err != REG_NOERROR, 0))
    1677           6 :         goto free_return;
    1678           6 :     }
    1679           6 :   err = REG_NOERROR;
    1680             :  free_return:
    1681             :   re_node_set_free (&cur_dest);
    1682             :   return err;
    1683             : }
    1684          11 : 
    1685             : static reg_errcode_t
    1686             : internal_function
    1687          11 : build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
    1688          11 :                      Idx str_idx, re_node_set *cur_dest)
    1689             : {
    1690             :   const re_dfa_t *const dfa = mctx->dfa;
    1691             :   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
    1692             :   Idx i;
    1693             : 
    1694             :   /* Then build the next sifted state.
    1695             :      We build the next sifted state on `cur_dest', and update
    1696             :      `sifted_states[str_idx]' with `cur_dest'.
    1697             :      Note:
    1698          42 :      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
    1699             :      `cur_src' points the node_set of the old `state_log[str_idx]'
    1700          31 :      (with the epsilon nodes pre-filtered out).  */
    1701          31 :   for (i = 0; i < cur_src->nelem; i++)
    1702             :     {
    1703             :       Idx prev_node = cur_src->elems[i];
    1704             :       int naccepted = 0;
    1705             :       bool ok;
    1706             : 
    1707             : #ifdef DEBUG
    1708             :       re_token_type_t type = dfa->nodes[prev_node].type;
    1709             :       assert (!IS_EPSILON_NODE (type));
    1710          31 : #endif
    1711           0 : #ifdef RE_ENABLE_I18N
    1712             :       /* If the node may accept `multi byte'.  */
    1713             :       if (dfa->nodes[prev_node].accept_mb)
    1714             :         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
    1715             :                                          str_idx, sctx->last_str_idx);
    1716             : #endif /* RE_ENABLE_I18N */
    1717          31 : 
    1718          31 :       /* We don't check backreferences here.
    1719          11 :          See update_cur_sifted_state().  */
    1720             :       if (!naccepted
    1721          11 :           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
    1722             :           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
    1723          31 :                                   dfa->nexts[prev_node]))
    1724          20 :         naccepted = 1;
    1725             : 
    1726          11 :       if (naccepted == 0)
    1727             :         continue;
    1728           0 : 
    1729           0 :       if (sctx->limits.nelem)
    1730           0 :         {
    1731             :           Idx to_idx = str_idx + naccepted;
    1732           0 :           if (check_dst_limits (mctx, &sctx->limits,
    1733             :                                 dfa->nexts[prev_node], to_idx,
    1734          11 :                                 prev_node, str_idx))
    1735          11 :             continue;
    1736           0 :         }
    1737             :       ok = re_node_set_insert (cur_dest, prev_node);
    1738             :       if (BE (! ok, 0))
    1739          11 :         return REG_ESPACE;
    1740             :     }
    1741             : 
    1742             :   return REG_NOERROR;
    1743             : }
    1744             : 
    1745             : /* Helper functions.  */
    1746           0 : 
    1747             : static reg_errcode_t
    1748           0 : internal_function
    1749             : clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
    1750           0 : {
    1751           0 :   Idx top = mctx->state_log_top;
    1752           0 : 
    1753             :   if (next_state_log_idx >= mctx->input.bufs_len
    1754             :       || (next_state_log_idx >= mctx->input.valid_len
    1755           0 :           && mctx->input.valid_len < mctx->input.len))
    1756           0 :     {
    1757           0 :       reg_errcode_t err;
    1758             :       err = extend_buffers (mctx);
    1759             :       if (BE (err != REG_NOERROR, 0))
    1760           0 :         return err;
    1761             :     }
    1762           0 : 
    1763           0 :   if (top < next_state_log_idx)
    1764           0 :     {
    1765             :       memset (mctx->state_log + top + 1, '\0',
    1766           0 :               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
    1767             :       mctx->state_log_top = next_state_log_idx;
    1768             :     }
    1769             :   return REG_NOERROR;
    1770             : }
    1771           0 : 
    1772             : static reg_errcode_t
    1773             : internal_function
    1774             : merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
    1775             :                    re_dfastate_t **src, Idx num)
    1776           0 : {
    1777             :   Idx st_idx;
    1778           0 :   reg_errcode_t err;
    1779           0 :   for (st_idx = 0; st_idx < num; ++st_idx)
    1780           0 :     {
    1781             :       if (dst[st_idx] == NULL)
    1782             :         dst[st_idx] = src[st_idx];
    1783           0 :       else if (src[st_idx] != NULL)
    1784           0 :         {
    1785           0 :           re_node_set merged_set;
    1786           0 :           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
    1787           0 :                                         &src[st_idx]->nodes);
    1788           0 :           if (BE (err != REG_NOERROR, 0))
    1789           0 :             return err;
    1790           0 :           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
    1791             :           re_node_set_free (&merged_set);
    1792             :           if (BE (err != REG_NOERROR, 0))
    1793           0 :             return err;
    1794             :         }
    1795             :     }
    1796             :   return REG_NOERROR;
    1797             : }
    1798          17 : 
    1799             : static reg_errcode_t
    1800             : internal_function
    1801             : update_cur_sifted_state (const re_match_context_t *mctx,
    1802          17 :                          re_sift_context_t *sctx, Idx str_idx,
    1803          17 :                          re_node_set *dest_nodes)
    1804             : {
    1805          34 :   const re_dfa_t *const dfa = mctx->dfa;
    1806          17 :   reg_errcode_t err = REG_NOERROR;
    1807             :   const re_node_set *candidates;
    1808          17 :   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
    1809           0 :                 : &mctx->state_log[str_idx]->nodes);
    1810             : 
    1811             :   if (dest_nodes->nelem == 0)
    1812          17 :     sctx->sifted_states[str_idx] = NULL;
    1813             :   else
    1814             :     {
    1815             :       if (candidates)
    1816          17 :         {
    1817          17 :           /* At first, add the nodes which can epsilon transit to a node in
    1818           0 :              DEST_NODE.  */
    1819             :           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
    1820             :           if (BE (err != REG_NOERROR, 0))
    1821          17 :             return err;
    1822             : 
    1823           0 :           /* Then, check the limitations in the current sift_context.  */
    1824             :           if (sctx->limits.nelem)
    1825           0 :             {
    1826           0 :               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
    1827             :                                          mctx->bkref_ents, str_idx);
    1828             :               if (BE (err != REG_NOERROR, 0))
    1829             :                 return err;
    1830          17 :             }
    1831          17 :         }
    1832           0 : 
    1833             :       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
    1834             :       if (BE (err != REG_NOERROR, 0))
    1835          17 :         return err;
    1836             :     }
    1837           0 : 
    1838           0 :   if (candidates && mctx->state_log[str_idx]->has_backref)
    1839           0 :     {
    1840             :       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
    1841          17 :       if (BE (err != REG_NOERROR, 0))
    1842             :         return err;
    1843             :     }
    1844             :   return REG_NOERROR;
    1845             : }
    1846          17 : 
    1847             : static reg_errcode_t
    1848             : internal_function
    1849          17 : add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
    1850             :                        const re_node_set *candidates)
    1851             : {
    1852          17 :   reg_errcode_t err = REG_NOERROR;
    1853          17 :   Idx i;
    1854           0 : 
    1855             :   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
    1856          17 :   if (BE (err != REG_NOERROR, 0))
    1857             :     return err;
    1858          17 : 
    1859          17 :   if (!state->inveclosure.alloc)
    1860           0 :     {
    1861          34 :       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
    1862          17 :       if (BE (err != REG_NOERROR, 0))
    1863          17 :         return REG_ESPACE;
    1864             :       for (i = 0; i < dest_nodes->nelem; i++)
    1865          17 :         re_node_set_merge (&state->inveclosure,
    1866          17 :                            dfa->inveclosures + dest_nodes->elems[i]);
    1867             :     }
    1868             :   return re_node_set_add_intersect (dest_nodes, candidates,
    1869             :                                     &state->inveclosure);
    1870             : }
    1871           0 : 
    1872             : static reg_errcode_t
    1873             : internal_function
    1874             : sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
    1875             :                        const re_node_set *candidates)
    1876           0 : {
    1877             :     Idx ecl_idx;
    1878           0 :     reg_errcode_t err;
    1879           0 :     re_node_set *inv_eclosure = dfa->inveclosures + node;
    1880             :     re_node_set except_nodes;
    1881           0 :     re_node_set_init_empty (&except_nodes);
    1882           0 :     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
    1883           0 :       {
    1884           0 :         Idx cur_node = inv_eclosure->elems[ecl_idx];
    1885             :         if (cur_node == node)
    1886           0 :           continue;
    1887           0 :         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
    1888           0 :           {
    1889           0 :             Idx edst1 = dfa->edests[cur_node].elems[0];
    1890           0 :             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
    1891           0 :                          ? dfa->edests[cur_node].elems[1] : REG_MISSING);
    1892           0 :             if ((!re_node_set_contains (inv_eclosure, edst1)
    1893           0 :                  && re_node_set_contains (dest_nodes, edst1))
    1894             :                 || (REG_VALID_NONZERO_INDEX (edst2)
    1895           0 :                     && !re_node_set_contains (inv_eclosure, edst2)
    1896           0 :                     && re_node_set_contains (dest_nodes, edst2)))
    1897           0 :               {
    1898             :                 err = re_node_set_add_intersect (&except_nodes, candidates,
    1899           0 :                                                  dfa->inveclosures + cur_node);
    1900           0 :                 if (BE (err != REG_NOERROR, 0))
    1901             :                   {
    1902             :                     re_node_set_free (&except_nodes);
    1903             :                     return err;
    1904             :                   }
    1905           0 :               }
    1906             :           }
    1907           0 :       }
    1908           0 :     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
    1909             :       {
    1910           0 :         Idx cur_node = inv_eclosure->elems[ecl_idx];
    1911           0 :         if (!re_node_set_contains (&except_nodes, cur_node))
    1912             :           {
    1913             :             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
    1914           0 :             re_node_set_remove_at (dest_nodes, idx);
    1915           0 :           }
    1916             :       }
    1917             :     re_node_set_free (&except_nodes);
    1918             :     return REG_NOERROR;
    1919             : }
    1920           0 : 
    1921             : static bool
    1922             : internal_function
    1923           0 : check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
    1924             :                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
    1925             : {
    1926           0 :   const re_dfa_t *const dfa = mctx->dfa;
    1927           0 :   Idx lim_idx, src_pos, dst_pos;
    1928           0 : 
    1929             :   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
    1930             :   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
    1931             :   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
    1932           0 :     {
    1933           0 :       Idx subexp_idx;
    1934             :       struct re_backref_cache_entry *ent;
    1935           0 :       ent = mctx->bkref_ents + limits->elems[lim_idx];
    1936             :       subexp_idx = dfa->nodes[ent->node].opr.idx;
    1937             : 
    1938           0 :       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
    1939             :                                            subexp_idx, dst_node, dst_idx,
    1940             :                                            dst_bkref_idx);
    1941             :       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
    1942             :                                            subexp_idx, src_node, src_idx,
    1943             :                                            src_bkref_idx);
    1944             : 
    1945             :       /* In case of:
    1946           0 :          <src> <dst> ( <subexp> )
    1947           0 :          ( <subexp> ) <src> <dst>
    1948             :          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
    1949           0 :       if (src_pos == dst_pos)
    1950             :         continue; /* This is unrelated limitation.  */
    1951           0 :       else
    1952             :         return true;
    1953             :     }
    1954             :   return false;
    1955             : }
    1956           0 : 
    1957             : static int
    1958             : internal_function
    1959           0 : check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
    1960           0 :                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
    1961             : {
    1962             :   const re_dfa_t *const dfa = mctx->dfa;
    1963             :   const re_node_set *eclosures = dfa->eclosures + from_node;
    1964             :   Idx node_idx;
    1965           0 : 
    1966             :   /* Else, we are on the boundary: examine the nodes on the epsilon
    1967           0 :      closure.  */
    1968           0 :   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
    1969             :     {
    1970           0 :       Idx node = eclosures->elems[node_idx];
    1971           0 :       switch (dfa->nodes[node].type)
    1972             :         {
    1973           0 :         case OP_BACK_REF:
    1974             :           if (bkref_idx != REG_MISSING)
    1975             :             {
    1976             :               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
    1977             :               do
    1978             :                 {
    1979           0 :                   Idx dst;
    1980           0 :                   int cpos;
    1981             : 
    1982           0 :                   if (ent->node != node)
    1983           0 :                     continue;
    1984             : 
    1985           0 :                   if (subexp_idx < BITSET_WORD_BITS
    1986             :                       && !(ent->eps_reachable_subexps_map
    1987             :                            & ((bitset_word_t) 1 << subexp_idx)))
    1988             :                     continue;
    1989             : 
    1990             :                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
    1991             :                      OP_CLOSE_SUBEXP cases below.  But, if the
    1992             :                      destination node is the same node as the source
    1993           0 :                      node, don't recurse because it would cause an
    1994           0 :                      infinite loop: a regex that exhibits this behavior
    1995             :                      is ()\1*\1*  */
    1996           0 :                   dst = dfa->edests[node].elems[0];
    1997           0 :                   if (dst == from_node)
    1998             :                     {
    1999           0 :                       if (boundaries & 1)
    2000             :                         return -1;
    2001             :                       else /* if (boundaries & 2) */
    2002           0 :                         return 0;
    2003             :                     }
    2004             : 
    2005           0 :                   cpos =
    2006           0 :                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
    2007           0 :                                                  dst, bkref_idx);
    2008           0 :                   if (cpos == -1 /* && (boundaries & 1) */)
    2009             :                     return -1;
    2010           0 :                   if (cpos == 0 && (boundaries & 2))
    2011             :                     return 0;
    2012           0 : 
    2013             :                   if (subexp_idx < BITSET_WORD_BITS)
    2014           0 :                     ent->eps_reachable_subexps_map
    2015             :                       &= ~((bitset_word_t) 1 << subexp_idx);
    2016           0 :                 }
    2017             :               while (ent++->more);
    2018           0 :             }
    2019           0 :           break;
    2020           0 : 
    2021           0 :         case OP_OPEN_SUBEXP:
    2022             :           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
    2023           0 :             return -1;
    2024           0 :           break;
    2025           0 : 
    2026           0 :         case OP_CLOSE_SUBEXP:
    2027             :           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
    2028           0 :             return 0;
    2029           0 :           break;
    2030             : 
    2031             :         default:
    2032             :             break;
    2033           0 :         }
    2034             :     }
    2035             : 
    2036             :   return (boundaries & 2) ? 1 : 0;
    2037             : }
    2038           0 : 
    2039             : static int
    2040             : internal_function
    2041             : check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
    2042           0 :                            Idx subexp_idx, Idx from_node, Idx str_idx,
    2043             :                            Idx bkref_idx)
    2044             : {
    2045             :   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
    2046           0 :   int boundaries;
    2047           0 : 
    2048             :   /* If we are outside the range of the subexpression, return -1 or 1.  */
    2049           0 :   if (str_idx < lim->subexp_from)
    2050           0 :     return -1;
    2051             : 
    2052             :   if (lim->subexp_to < str_idx)
    2053           0 :     return 1;
    2054           0 : 
    2055           0 :   /* If we are within the subexpression, return 0.  */
    2056           0 :   boundaries = (str_idx == lim->subexp_from);
    2057             :   boundaries |= (str_idx == lim->subexp_to) << 1;
    2058             :   if (boundaries == 0)
    2059           0 :     return 0;
    2060             : 
    2061             :   /* Else, examine epsilon closure.  */
    2062             :   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
    2063             :                                       from_node, bkref_idx);
    2064             : }
    2065             : 
    2066             : /* Check the limitations of sub expressions LIMITS, and remove the nodes
    2067             :    which are against limitations from DEST_NODES. */
    2068           0 : 
    2069             : static reg_errcode_t
    2070             : internal_function
    2071             : check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
    2072             :                      const re_node_set *candidates, re_node_set *limits,
    2073             :                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
    2074             : {
    2075           0 :   reg_errcode_t err;
    2076             :   Idx node_idx, lim_idx;
    2077             : 
    2078             :   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
    2079           0 :     {
    2080             :       Idx subexp_idx;
    2081           0 :       struct re_backref_cache_entry *ent;
    2082           0 :       ent = bkref_ents + limits->elems[lim_idx];
    2083             : 
    2084           0 :       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
    2085           0 :         continue; /* This is unrelated limitation.  */
    2086             : 
    2087           0 :       subexp_idx = dfa->nodes[ent->node].opr.idx;
    2088           0 :       if (ent->subexp_to == str_idx)
    2089           0 :         {
    2090             :           Idx ops_node = REG_MISSING;
    2091           0 :           Idx cls_node = REG_MISSING;
    2092           0 :           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
    2093           0 :             {
    2094           0 :               Idx node = dest_nodes->elems[node_idx];
    2095           0 :               re_token_type_t type = dfa->nodes[node].type;
    2096           0 :               if (type == OP_OPEN_SUBEXP
    2097           0 :                   && subexp_idx == dfa->nodes[node].opr.idx)
    2098           0 :                 ops_node = node;
    2099             :               else if (type == OP_CLOSE_SUBEXP
    2100             :                        && subexp_idx == dfa->nodes[node].opr.idx)
    2101             :                 cls_node = node;
    2102             :             }
    2103           0 : 
    2104             :           /* Check the limitation of the open subexpression.  */
    2105           0 :           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
    2106             :           if (REG_VALID_INDEX (ops_node))
    2107           0 :             {
    2108           0 :               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
    2109             :                                            candidates);
    2110             :               if (BE (err != REG_NOERROR, 0))
    2111             :                 return err;
    2112           0 :             }
    2113           0 : 
    2114             :           /* Check the limitation of the close subexpression.  */
    2115           0 :           if (REG_VALID_INDEX (cls_node))
    2116           0 :             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
    2117             :               {
    2118           0 :                 Idx node = dest_nodes->elems[node_idx];
    2119             :                 if (!re_node_set_contains (dfa->inveclosures + node,
    2120             :                                            cls_node)
    2121             :                     && !re_node_set_contains (dfa->eclosures + node,
    2122             :                                               cls_node))
    2123           0 :                   {
    2124             :                     /* It is against this limitation.
    2125           0 :                        Remove it form the current sifted state.  */
    2126           0 :                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
    2127           0 :                                                  candidates);
    2128             :                     if (BE (err != REG_NOERROR, 0))
    2129             :                       return err;
    2130             :                     --node_idx;
    2131             :                   }
    2132             :               }
    2133           0 :         }
    2134             :       else /* (ent->subexp_to != str_idx)  */
    2135           0 :         {
    2136           0 :           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
    2137           0 :             {
    2138             :               Idx node = dest_nodes->elems[node_idx];
    2139           0 :               re_token_type_t type = dfa->nodes[node].type;
    2140           0 :               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
    2141             :                 {
    2142             :                   if (subexp_idx != dfa->nodes[node].opr.idx)
    2143           0 :                     continue;
    2144             :                   /* It is against this limitation.
    2145           0 :                      Remove it form the current sifted state.  */
    2146           0 :                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
    2147             :                                                candidates);
    2148             :                   if (BE (err != REG_NOERROR, 0))
    2149             :                     return err;
    2150             :                 }
    2151           0 :             }
    2152             :         }
    2153             :     }
    2154             :   return REG_NOERROR;
    2155             : }
    2156           0 : 
    2157             : static reg_errcode_t
    2158             : internal_function
    2159           0 : sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
    2160             :                    Idx str_idx, const re_node_set *candidates)
    2161             : {
    2162             :   const re_dfa_t *const dfa = mctx->dfa;
    2163           0 :   reg_errcode_t err;
    2164             :   Idx node_idx, node;
    2165           0 :   re_sift_context_t local_sctx;
    2166           0 :   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
    2167             : 
    2168           0 :   if (first_idx == REG_MISSING)
    2169             :     return REG_NOERROR;
    2170           0 : 
    2171             :   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
    2172             : 
    2173             :   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
    2174             :     {
    2175           0 :       Idx enabled_idx;
    2176           0 :       re_token_type_t type;
    2177             :       struct re_backref_cache_entry *entry;
    2178           0 :       node = candidates->elems[node_idx];
    2179           0 :       type = dfa->nodes[node].type;
    2180           0 :       /* Avoid infinite loop for the REs like "()\1+".  */
    2181           0 :       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
    2182             :         continue;
    2183           0 :       if (type != OP_BACK_REF)
    2184           0 :         continue;
    2185             : 
    2186             :       entry = mctx->bkref_ents + first_idx;
    2187             :       enabled_idx = first_idx;
    2188             :       do
    2189             :         {
    2190             :           Idx subexp_len;
    2191             :           Idx to_idx;
    2192             :           Idx dst_node;
    2193           0 :           bool ok;
    2194           0 :           re_dfastate_t *cur_state;
    2195           0 : 
    2196           0 :           if (entry->node != node)
    2197           0 :             continue;
    2198           0 :           subexp_len = entry->subexp_to - entry->subexp_from;
    2199             :           to_idx = str_idx + subexp_len;
    2200           0 :           dst_node = (subexp_len ? dfa->nexts[node]
    2201           0 :                       : dfa->edests[node].elems[0]);
    2202           0 : 
    2203           0 :           if (to_idx > sctx->last_str_idx
    2204             :               || sctx->sifted_states[to_idx] == NULL
    2205           0 :               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
    2206             :               || check_dst_limits (mctx, &sctx->limits, node,
    2207           0 :                                    str_idx, dst_node, to_idx))
    2208             :             continue;
    2209           0 : 
    2210           0 :           if (local_sctx.sifted_states == NULL)
    2211           0 :             {
    2212           0 :               local_sctx = *sctx;
    2213             :               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
    2214           0 :               if (BE (err != REG_NOERROR, 0))
    2215           0 :                 goto free_return;
    2216           0 :             }
    2217           0 :           local_sctx.last_node = node;
    2218             :           local_sctx.last_str_idx = str_idx;
    2219           0 :           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
    2220           0 :           if (BE (! ok, 0))
    2221             :             {
    2222           0 :               err = REG_ESPACE;
    2223           0 :               goto free_return;
    2224           0 :             }
    2225           0 :           cur_state = local_sctx.sifted_states[str_idx];
    2226           0 :           err = sift_states_backward (mctx, &local_sctx);
    2227             :           if (BE (err != REG_NOERROR, 0))
    2228           0 :             goto free_return;
    2229             :           if (sctx->limited_states != NULL)
    2230             :             {
    2231           0 :               err = merge_state_array (dfa, sctx->limited_states,
    2232           0 :                                        local_sctx.sifted_states,
    2233             :                                        str_idx + 1);
    2234           0 :               if (BE (err != REG_NOERROR, 0))
    2235           0 :                 goto free_return;
    2236             :             }
    2237             :           local_sctx.sifted_states[str_idx] = cur_state;
    2238           0 :           re_node_set_remove (&local_sctx.limits, enabled_idx);
    2239             : 
    2240           0 :           /* mctx->bkref_ents may have changed, reload the pointer.  */
    2241             :           entry = mctx->bkref_ents + enabled_idx;
    2242           0 :         }
    2243           0 :       while (enabled_idx++, entry++->more);
    2244           0 :     }
    2245             :   err = REG_NOERROR;
    2246           0 :  free_return:
    2247             :   if (local_sctx.sifted_states != NULL)
    2248             :     {
    2249           0 :       re_node_set_free (&local_sctx.limits);
    2250             :     }
    2251             : 
    2252             :   return err;
    2253             : }
    2254             : 
    2255             : 
    2256           0 : #ifdef RE_ENABLE_I18N
    2257             : static int
    2258             : internal_function
    2259           0 : sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
    2260             :                      Idx node_idx, Idx str_idx, Idx max_str_idx)
    2261             : {
    2262           0 :   const re_dfa_t *const dfa = mctx->dfa;
    2263           0 :   int naccepted;
    2264           0 :   /* Check the node can accept `multi byte'.  */
    2265             :   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
    2266             :   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
    2267             :       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
    2268             :                             dfa->nexts[node_idx]))
    2269           0 :     /* The node can't accept the `multi byte', or the
    2270             :        destination was already thrown away, then the node
    2271             :        could't accept the current input `multi byte'.   */
    2272           0 :     naccepted = 0;
    2273             :   /* Otherwise, it is sure that the node could accept
    2274             :      `naccepted' bytes input.  */
    2275             :   return naccepted;
    2276             : }
    2277             : #endif /* RE_ENABLE_I18N */
    2278             : 
    2279             : 
    2280             : /* Functions for state transition.  */
    2281             : 
    2282             : /* Return the next state to which the current state STATE will transit by
    2283             :    accepting the current input byte, and update STATE_LOG if necessary.
    2284             :    If STATE can accept a multibyte char/collating element/back reference
    2285             :    update the destination of STATE_LOG.  */
    2286         250 : 
    2287             : static re_dfastate_t *
    2288             : internal_function
    2289             : transit_state (reg_errcode_t *err, re_match_context_t *mctx,
    2290             :                re_dfastate_t *state)
    2291             : {
    2292             :   re_dfastate_t **trtable;
    2293             :   unsigned char ch;
    2294         250 : 
    2295             : #ifdef RE_ENABLE_I18N
    2296           0 :   /* If the current state can accept multibyte.  */
    2297           0 :   if (BE (state->accept_mb, 0))
    2298           0 :     {
    2299             :       *err = transit_state_mb (mctx, state);
    2300             :       if (BE (*err != REG_NOERROR, 0))
    2301             :         return NULL;
    2302             :     }
    2303             : #endif /* RE_ENABLE_I18N */
    2304             : 
    2305             :   /* Then decide the next state with the single byte.  */
    2306             : #if 0
    2307             :   if (0)
    2308             :     /* don't use transition table  */
    2309             :     return transit_state_sb (err, mctx, state);
    2310         250 : #endif
    2311             : 
    2312             :   /* Use transition table  */
    2313         520 :   ch = re_string_fetch_byte (&mctx->input);
    2314         385 :   for (;;)
    2315         250 :     {
    2316             :       trtable = state->trtable;
    2317         135 :       if (BE (trtable != NULL, 1))
    2318         135 :         return trtable[ch];
    2319             : 
    2320             :       trtable = state->word_trtable;
    2321             :       if (BE (trtable != NULL, 1))
    2322           0 :         {
    2323           0 :           unsigned int context;
    2324             :           context
    2325           0 :             = re_string_context_at (&mctx->input,
    2326           0 :                                     re_string_cur_idx (&mctx->input) - 1,
    2327             :                                     mctx->eflags);
    2328           0 :           if (IS_WORD_CONTEXT (context))
    2329             :             return trtable[ch + SBC_MAX];
    2330             :           else
    2331         135 :             return trtable[ch];
    2332             :         }
    2333           0 : 
    2334           0 :       if (!build_trtable (mctx->dfa, state))
    2335             :         {
    2336             :           *err = REG_ESPACE;
    2337             :           return NULL;
    2338             :         }
    2339             : 
    2340             :       /* Retry, we now have a transition table.  */
    2341             :     }
    2342             : }
    2343             : 
    2344          20 : /* Update the state_log if we need */
    2345             : static re_dfastate_t *
    2346             : internal_function
    2347          20 : merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
    2348          20 :                       re_dfastate_t *next_state)
    2349             : {
    2350          20 :   const re_dfa_t *const dfa = mctx->dfa;
    2351             :   Idx cur_idx = re_string_cur_idx (&mctx->input);
    2352          20 : 
    2353          20 :   if (cur_idx > mctx->state_log_top)
    2354             :     {
    2355           0 :       mctx->state_log[cur_idx] = next_state;
    2356             :       mctx->state_log_top = cur_idx;
    2357           0 :     }
    2358             :   else if (mctx->state_log[cur_idx] == 0)
    2359             :     {
    2360             :       mctx->state_log[cur_idx] = next_state;
    2361             :     }
    2362             :   else
    2363           0 :     {
    2364             :       re_dfastate_t *pstate;
    2365             :       unsigned int context;
    2366             :       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
    2367             :       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
    2368           0 :          the destination of a multibyte char/collating element/
    2369           0 :          back reference.  Then the next state is the union set of
    2370           0 :          these destinations and the results of the transition table.  */
    2371             :       pstate = mctx->state_log[cur_idx];
    2372           0 :       log_nodes = pstate->entrance_nodes;
    2373           0 :       if (next_state != NULL)
    2374             :         {
    2375           0 :           table_nodes = next_state->entrance_nodes;
    2376           0 :           *err = re_node_set_init_union (&next_nodes, table_nodes,
    2377             :                                              log_nodes);
    2378             :           if (BE (*err != REG_NOERROR, 0))
    2379           0 :             return NULL;
    2380             :         }
    2381             :       else
    2382             :         next_nodes = *log_nodes;
    2383           0 :       /* Note: We already add the nodes of the initial state,
    2384           0 :          then we don't need to add them here.  */
    2385             : 
    2386           0 :       context = re_string_context_at (&mctx->input,
    2387           0 :                                       re_string_cur_idx (&mctx->input) - 1,
    2388             :                                       mctx->eflags);
    2389             :       next_state = mctx->state_log[cur_idx]
    2390             :         = re_acquire_state_context (err, dfa, &next_nodes, context);
    2391           0 :       /* We don't need to check errors here, since the return value of
    2392           0 :          this function is next_state and ERR is already set.  */
    2393             : 
    2394             :       if (table_nodes != NULL)
    2395          20 :         re_node_set_free (&next_nodes);
    2396             :     }
    2397             : 
    2398             :   if (BE (dfa->nbackref, 0) && next_state != NULL)
    2399             :     {
    2400           0 :       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
    2401             :          later.  We must check them here, since the back references in the
    2402           0 :          next state might use them.  */
    2403           0 :       *err = check_subexp_matching_top (mctx, &next_state->nodes,
    2404             :                                         cur_idx);
    2405             :       if (BE (*err != REG_NOERROR, 0))
    2406           0 :         return NULL;
    2407             : 
    2408           0 :       /* If the next state has back references.  */
    2409           0 :       if (next_state->has_backref)
    2410           0 :         {
    2411           0 :           *err = transit_state_bkref (mctx, &next_state->nodes);
    2412             :           if (BE (*err != REG_NOERROR, 0))
    2413             :             return NULL;
    2414             :           next_state = mctx->state_log[cur_idx];
    2415          20 :         }
    2416             :     }
    2417             : 
    2418             :   return next_state;
    2419             : }
    2420             : 
    2421             : /* Skip bytes in the input that correspond to part of a
    2422             :    multi-byte match, then look in the log for a state
    2423           7 :    from which to restart matching.  */
    2424             : static re_dfastate_t *
    2425             : internal_function
    2426             : find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
    2427             : {
    2428           7 :   re_dfastate_t *cur_state;
    2429           7 :   do
    2430             :     {
    2431             :       Idx max = mctx->state_log_top;
    2432             :       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
    2433           7 : 
    2434           7 :       do
    2435           0 :         {
    2436             :           if (++cur_str_idx > max)
    2437           0 :             return NULL;
    2438             :           re_string_skip_bytes (&mctx->input, 1);
    2439           0 :         }
    2440             :       while (mctx->state_log[cur_str_idx] == NULL);
    2441           0 : 
    2442           0 :       cur_state = merge_state_with_log (err, mctx, NULL);
    2443             :     }
    2444             :   while (*err == REG_NOERROR && cur_state == NULL);
    2445             :   return cur_state;
    2446             : }
    2447             : 
    2448             : /* Helper functions for transit_state.  */
    2449             : 
    2450             : /* From the node set CUR_NODES, pick up the nodes whose types are
    2451             :    OP_OPEN_SUBEXP and which have corresponding back references in the regular
    2452             :    expression. And register them to use them later for evaluating the
    2453             :    correspoding back references.  */
    2454           0 : 
    2455             : static reg_errcode_t
    2456             : internal_function
    2457           0 : check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
    2458             :                            Idx str_idx)
    2459             : {
    2460             :   const re_dfa_t *const dfa = mctx->dfa;
    2461             :   Idx node_idx;
    2462             :   reg_errcode_t err;
    2463             : 
    2464             :   /* TODO: This isn't efficient.
    2465             :            Because there might be more than one nodes whose types are
    2466           0 :            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
    2467             :            nodes.
    2468           0 :            E.g. RE: (a){2}  */
    2469           0 :   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
    2470           0 :     {
    2471           0 :       Idx node = cur_nodes->elems[node_idx];
    2472           0 :       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
    2473             :           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
    2474           0 :           && (dfa->used_bkref_map
    2475           0 :               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
    2476           0 :         {
    2477             :           err = match_ctx_add_subtop (mctx, node, str_idx);
    2478             :           if (BE (err != REG_NOERROR, 0))
    2479           0 :             return err;
    2480             :         }
    2481             :     }
    2482             :   return REG_NOERROR;
    2483             : }
    2484             : 
    2485             : #if 0
    2486             : /* Return the next state to which the current state STATE will transit by
    2487             :    accepting the current input byte.  */
    2488             : 
    2489             : static re_dfastate_t *
    2490             : transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
    2491             :                   re_dfastate_t *state)
    2492             : {
    2493             :   const re_dfa_t *const dfa = mctx->dfa;
    2494             :   re_node_set next_nodes;
    2495             :   re_dfastate_t *next_state;
    2496             :   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
    2497             :   unsigned int context;
    2498             : 
    2499             :   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
    2500             :   if (BE (*err != REG_NOERROR, 0))
    2501             :     return NULL;
    2502             :   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
    2503             :     {
    2504             :       Idx cur_node = state->nodes.elems[node_cnt];
    2505             :       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
    2506             :         {
    2507             :           *err = re_node_set_merge (&next_nodes,
    2508             :                                     dfa->eclosures + dfa->nexts[cur_node]);
    2509             :           if (BE (*err != REG_NOERROR, 0))
    2510             :             {
    2511             :               re_node_set_free (&next_nodes);
    2512             :               return NULL;
    2513             :             }
    2514             :         }
    2515             :     }
    2516             :   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
    2517             :   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
    2518             :   /* We don't need to check errors here, since the return value of
    2519             :      this function is next_state and ERR is already set.  */
    2520             : 
    2521             :   re_node_set_free (&next_nodes);
    2522             :   re_string_skip_bytes (&mctx->input, 1);
    2523             :   return next_state;
    2524             : }
    2525             : #endif
    2526             : 
    2527           0 : #ifdef RE_ENABLE_I18N
    2528             : static reg_errcode_t
    2529           0 : internal_function
    2530             : transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
    2531             : {
    2532             :   const re_dfa_t *const dfa = mctx->dfa;
    2533           0 :   reg_errcode_t err;
    2534             :   Idx i;
    2535             : 
    2536           0 :   for (i = 0; i < pstate->nodes.nelem; ++i)
    2537             :     {
    2538             :       re_node_set dest_nodes, *new_nodes;
    2539             :       Idx cur_node_idx = pstate->nodes.elems[i];
    2540             :       int naccepted;
    2541             :       Idx dest_idx;
    2542           0 :       unsigned int context;
    2543           0 :       re_dfastate_t *dest_state;
    2544             : 
    2545           0 :       if (!dfa->nodes[cur_node_idx].accept_mb)
    2546             :         continue;
    2547           0 : 
    2548             :       if (dfa->nodes[cur_node_idx].constraint)
    2549             :         {
    2550           0 :           context = re_string_context_at (&mctx->input,
    2551             :                                           re_string_cur_idx (&mctx->input),
    2552           0 :                                           mctx->eflags);
    2553             :           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
    2554             :                                            context))
    2555             :             continue;
    2556           0 :         }
    2557             : 
    2558           0 :       /* How many bytes the node can accept?  */
    2559           0 :       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
    2560             :                                            re_string_cur_idx (&mctx->input));
    2561             :       if (naccepted == 0)
    2562           0 :         continue;
    2563           0 : 
    2564           0 :       /* The node can accepts `naccepted' bytes.  */
    2565           0 :       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
    2566           0 :       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
    2567           0 :                                : mctx->max_mb_elem_len);
    2568             :       err = clean_state_log_if_needed (mctx, dest_idx);
    2569             :       if (BE (err != REG_NOERROR, 0))
    2570             :         return err;
    2571           0 : #ifdef DEBUG
    2572             :       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
    2573           0 : #endif
    2574           0 :       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
    2575           0 : 
    2576             :       dest_state = mctx->state_log[dest_idx];
    2577             :       if (dest_state == NULL)
    2578           0 :         dest_nodes = *new_nodes;
    2579           0 :       else
    2580           0 :         {
    2581           0 :           err = re_node_set_init_union (&dest_nodes,
    2582             :                                         dest_state->entrance_nodes, new_nodes);
    2583           0 :           if (BE (err != REG_NOERROR, 0))
    2584             :             return err;
    2585           0 :         }
    2586           0 :       context = re_string_context_at (&mctx->input, dest_idx - 1,
    2587           0 :                                       mctx->eflags);
    2588           0 :       mctx->state_log[dest_idx]
    2589           0 :         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
    2590           0 :       if (dest_state != NULL)
    2591             :         re_node_set_free (&dest_nodes);
    2592           0 :       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
    2593             :         return err;
    2594             :     }
    2595             :   return REG_NOERROR;
    2596             : }
    2597             : #endif /* RE_ENABLE_I18N */
    2598           0 : 
    2599             : static reg_errcode_t
    2600           0 : internal_function
    2601             : transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
    2602             : {
    2603           0 :   const re_dfa_t *const dfa = mctx->dfa;
    2604             :   reg_errcode_t err;
    2605           0 :   Idx i;
    2606             :   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
    2607             : 
    2608           0 :   for (i = 0; i < nodes->nelem; ++i)
    2609             :     {
    2610           0 :       Idx dest_str_idx, prev_nelem, bkc_idx;
    2611             :       Idx node_idx = nodes->elems[i];
    2612             :       unsigned int context;
    2613             :       const re_token_t *node = dfa->nodes + node_idx;
    2614           0 :       re_node_set *new_dest_nodes;
    2615           0 : 
    2616             :       /* Check whether `node' is a backreference or not.  */
    2617           0 :       if (node->type != OP_BACK_REF)
    2618             :         continue;
    2619           0 : 
    2620             :       if (node->constraint)
    2621           0 :         {
    2622           0 :           context = re_string_context_at (&mctx->input, cur_str_idx,
    2623             :                                           mctx->eflags);
    2624             :           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
    2625             :             continue;
    2626             :         }
    2627           0 : 
    2628           0 :       /* `node' is a backreference.
    2629           0 :          Check the substring which the substring matched.  */
    2630           0 :       bkc_idx = mctx->nbkref_ents;
    2631             :       err = get_subexp (mctx, node_idx, cur_str_idx);
    2632             :       if (BE (err != REG_NOERROR, 0))
    2633             :         goto free_return;
    2634             : 
    2635             :       /* And add the epsilon closures (which is `new_dest_nodes') of
    2636             :          the backreference to appropriate state_log.  */
    2637           0 : #ifdef DEBUG
    2638             :       assert (dfa->nexts[node_idx] != REG_MISSING);
    2639             : #endif
    2640             :       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
    2641             :         {
    2642           0 :           Idx subexp_len;
    2643           0 :           re_dfastate_t *dest_state;
    2644           0 :           struct re_backref_cache_entry *bkref_ent;
    2645           0 :           bkref_ent = mctx->bkref_ents + bkc_idx;
    2646           0 :           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
    2647           0 :             continue;
    2648           0 :           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
    2649           0 :           new_dest_nodes = (subexp_len == 0
    2650           0 :                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
    2651           0 :                             : dfa->eclosures + dfa->nexts[node_idx]);
    2652             :           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
    2653           0 :                           - bkref_ent->subexp_from);
    2654           0 :           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
    2655           0 :                                           mctx->eflags);
    2656             :           dest_state = mctx->state_log[dest_str_idx];
    2657           0 :           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
    2658             :                         : mctx->state_log[cur_str_idx]->nodes.nelem);
    2659           0 :           /* Add `new_dest_node' to state_log.  */
    2660           0 :           if (dest_state == NULL)
    2661             :             {
    2662           0 :               mctx->state_log[dest_str_idx]
    2663             :                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
    2664           0 :                                             context);
    2665             :               if (BE (mctx->state_log[dest_str_idx] == NULL
    2666             :                       && err != REG_NOERROR, 0))
    2667             :                 goto free_return;
    2668             :             }
    2669           0 :           else
    2670           0 :             {
    2671             :               re_node_set dest_nodes;
    2672           0 :               err = re_node_set_init_union (&dest_nodes,
    2673             :                                             dest_state->entrance_nodes,
    2674           0 :                                             new_dest_nodes);
    2675           0 :               if (BE (err != REG_NOERROR, 0))
    2676             :                 {
    2677           0 :                   re_node_set_free (&dest_nodes);
    2678           0 :                   goto free_return;
    2679           0 :                 }
    2680           0 :               mctx->state_log[dest_str_idx]
    2681             :                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
    2682           0 :               re_node_set_free (&dest_nodes);
    2683             :               if (BE (mctx->state_log[dest_str_idx] == NULL
    2684             :                       && err != REG_NOERROR, 0))
    2685             :                 goto free_return;
    2686           0 :             }
    2687           0 :           /* We need to check recursively if the backreference can epsilon
    2688             :              transit.  */
    2689           0 :           if (subexp_len == 0
    2690             :               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
    2691           0 :             {
    2692           0 :               err = check_subexp_matching_top (mctx, new_dest_nodes,
    2693           0 :                                                cur_str_idx);
    2694           0 :               if (BE (err != REG_NOERROR, 0))
    2695           0 :                 goto free_return;
    2696             :               err = transit_state_bkref (mctx, new_dest_nodes);
    2697             :               if (BE (err != REG_NOERROR, 0))
    2698             :                 goto free_return;
    2699           0 :             }
    2700           0 :         }
    2701           0 :     }
    2702             :   err = REG_NOERROR;
    2703             :  free_return:
    2704             :   return err;
    2705             : }
    2706             : 
    2707             : /* Enumerate all the candidates which the backreference BKREF_NODE can match
    2708             :    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
    2709             :    Note that we might collect inappropriate candidates here.
    2710             :    However, the cost of checking them strictly here is too high, then we
    2711             :    delay these checking for prune_impossible_nodes().  */
    2712           0 : 
    2713             : static reg_errcode_t
    2714           0 : internal_function
    2715             : get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
    2716           0 : {
    2717             :   const re_dfa_t *const dfa = mctx->dfa;
    2718           0 :   Idx subexp_num, sub_top_idx;
    2719           0 :   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
    2720             :   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
    2721           0 :   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
    2722           0 :   if (cache_idx != REG_MISSING)
    2723             :     {
    2724           0 :       const struct re_backref_cache_entry *entry
    2725           0 :         = mctx->bkref_ents + cache_idx;
    2726           0 :       do
    2727             :         if (entry->node == bkref_node)
    2728             :           return REG_NOERROR; /* We already checked it.  */
    2729           0 :       while (entry++->more);
    2730             :     }
    2731             : 
    2732           0 :   subexp_num = dfa->nodes[bkref_node].opr.idx;
    2733             : 
    2734             :   /* For each sub expression  */
    2735           0 :   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
    2736             :     {
    2737             :       reg_errcode_t err;
    2738             :       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
    2739           0 :       re_sub_match_last_t *sub_last;
    2740           0 :       Idx sub_last_idx, sl_str, bkref_str_off;
    2741             : 
    2742           0 :       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
    2743           0 :         continue; /* It isn't related.  */
    2744             : 
    2745             :       sl_str = sub_top->str_idx;
    2746           0 :       bkref_str_off = bkref_str_idx;
    2747             :       /* At first, check the last node of sub expressions we already
    2748             :          evaluated.  */
    2749           0 :       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
    2750           0 :         {
    2751             :           regoff_t sl_str_diff;
    2752             :           sub_last = sub_top->lasts[sub_last_idx];
    2753           0 :           sl_str_diff = sub_last->str_idx - sl_str;
    2754             :           /* The matched string by the sub expression match with the substring
    2755           0 :              at the back reference?  */
    2756             :           if (sl_str_diff > 0)
    2757             :             {
    2758           0 :               if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
    2759           0 :                 {
    2760             :                   /* Not enough chars for a successful match.  */
    2761           0 :                   if (bkref_str_off + sl_str_diff > mctx->input.len)
    2762             :                     break;
    2763             : 
    2764           0 :                   err = clean_state_log_if_needed (mctx,
    2765           0 :                                                    bkref_str_off
    2766           0 :                                                    + sl_str_diff);
    2767             :                   if (BE (err != REG_NOERROR, 0))
    2768           0 :                     return err;
    2769             :                   buf = (const char *) re_string_get_buffer (&mctx->input);
    2770           0 :                 }
    2771             :               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
    2772           0 :                 /* We don't need to search this sub expression any more.  */
    2773           0 :                 break;
    2774           0 :             }
    2775             :           bkref_str_off += sl_str_diff;
    2776             :           sl_str += sl_str_diff;
    2777             :           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
    2778             :                                 bkref_str_idx);
    2779           0 : 
    2780             :           /* Reload buf, since the preceding call might have reallocated
    2781           0 :              the buffer.  */
    2782           0 :           buf = (const char *) re_string_get_buffer (&mctx->input);
    2783           0 : 
    2784           0 :           if (err == REG_NOMATCH)
    2785             :             continue;
    2786             :           if (BE (err != REG_NOERROR, 0))
    2787           0 :             return err;
    2788           0 :         }
    2789           0 : 
    2790           0 :       if (sub_last_idx < sub_top->nlasts)
    2791             :         continue;
    2792           0 :       if (sub_last_idx > 0)
    2793             :         ++sl_str;
    2794             :       /* Then, search for the other last nodes of the sub expression.  */
    2795             :       for (; sl_str <= bkref_str_idx; ++sl_str)
    2796             :         {
    2797           0 :           Idx cls_node;
    2798             :           regoff_t sl_str_off;
    2799             :           const re_node_set *nodes;
    2800           0 :           sl_str_off = sl_str - sub_top->str_idx;
    2801             :           /* The matched string by the sub expression match with the substring
    2802           0 :              at the back reference?  */
    2803             :           if (sl_str_off > 0)
    2804             :             {
    2805           0 :               if (BE (bkref_str_off >= mctx->input.valid_len, 0))
    2806           0 :                 {
    2807             :                   /* If we are at the end of the input, we cannot match.  */
    2808           0 :                   if (bkref_str_off >= mctx->input.len)
    2809           0 :                     break;
    2810           0 : 
    2811             :                   err = extend_buffers (mctx);
    2812           0 :                   if (BE (err != REG_NOERROR, 0))
    2813             :                     return err;
    2814           0 : 
    2815           0 :                   buf = (const char *) re_string_get_buffer (&mctx->input);
    2816             :                 }
    2817             :               if (buf [bkref_str_off++] != buf[sl_str - 1])
    2818           0 :                 break; /* We don't need to search this sub expression
    2819           0 :                           any more.  */
    2820             :             }
    2821           0 :           if (mctx->state_log[sl_str] == NULL)
    2822           0 :             continue;
    2823             :           /* Does this state have a ')' of the sub expression?  */
    2824           0 :           nodes = &mctx->state_log[sl_str]->nodes;
    2825           0 :           cls_node = find_subexp_node (dfa, nodes, subexp_num,
    2826           0 :                                        OP_CLOSE_SUBEXP);
    2827             :           if (cls_node == REG_MISSING)
    2828           0 :             continue; /* No.  */
    2829           0 :           if (sub_top->path == NULL)
    2830           0 :             {
    2831           0 :               sub_top->path = calloc (sizeof (state_array_t),
    2832             :                                       sl_str - sub_top->str_idx + 1);
    2833             :               if (sub_top->path == NULL)
    2834             :                 return REG_ESPACE;
    2835           0 :             }
    2836             :           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
    2837             :              in the current context?  */
    2838           0 :           err = check_arrival (mctx, sub_top->path, sub_top->node,
    2839           0 :                                sub_top->str_idx, cls_node, sl_str,
    2840           0 :                                OP_CLOSE_SUBEXP);
    2841           0 :           if (err == REG_NOMATCH)
    2842           0 :               continue;
    2843           0 :           if (BE (err != REG_NOERROR, 0))
    2844           0 :               return err;
    2845           0 :           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
    2846             :           if (BE (sub_last == NULL, 0))
    2847           0 :             return REG_ESPACE;
    2848           0 :           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
    2849             :                                 bkref_str_idx);
    2850             :           if (err == REG_NOMATCH)
    2851           0 :             continue;
    2852             :         }
    2853             :     }
    2854             :   return REG_NOERROR;
    2855             : }
    2856             : 
    2857             : /* Helper functions for get_subexp().  */
    2858             : 
    2859             : /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
    2860             :    If it can arrive, register the sub expression expressed with SUB_TOP
    2861             :    and SUB_LAST.  */
    2862           0 : 
    2863             : static reg_errcode_t
    2864             : internal_function
    2865             : get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
    2866             :                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
    2867             : {
    2868           0 :   reg_errcode_t err;
    2869             :   Idx to_idx;
    2870             :   /* Can the subexpression arrive the back reference?  */
    2871           0 :   err = check_arrival (mctx, &sub_last->path, sub_last->node,
    2872           0 :                        sub_last->str_idx, bkref_node, bkref_str,
    2873           0 :                        OP_OPEN_SUBEXP);
    2874             :   if (err != REG_NOERROR)
    2875           0 :     return err;
    2876           0 :   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
    2877           0 :                              sub_last->str_idx);
    2878           0 :   if (BE (err != REG_NOERROR, 0))
    2879             :     return err;
    2880             :   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
    2881             :   return clean_state_log_if_needed (mctx, to_idx);
    2882             : }
    2883             : 
    2884             : /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
    2885             :    Search '(' if FL_OPEN, or search ')' otherwise.
    2886             :    TODO: This function isn't efficient...
    2887             :          Because there might be more than one nodes whose types are
    2888             :          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
    2889             :          nodes.
    2890             :          E.g. RE: (a){2}  */
    2891           0 : 
    2892             : static Idx
    2893             : internal_function
    2894             : find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
    2895           0 :                   Idx subexp_idx, int type)
    2896             : {
    2897           0 :   Idx cls_idx;
    2898           0 :   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
    2899           0 :     {
    2900           0 :       Idx cls_node = nodes->elems[cls_idx];
    2901           0 :       const re_token_t *node = dfa->nodes + cls_node;
    2902             :       if (node->type == type
    2903           0 :           && node->opr.idx == subexp_idx)
    2904             :         return cls_node;
    2905             :     }
    2906             :   return REG_MISSING;
    2907             : }
    2908             : 
    2909             : /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
    2910             :    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
    2911             :    heavily reused.
    2912             :    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
    2913           0 : 
    2914             : static reg_errcode_t
    2915             : internal_function
    2916           0 : check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
    2917           0 :                Idx top_str, Idx last_node, Idx last_str, int type)
    2918             : {
    2919           0 :   const re_dfa_t *const dfa = mctx->dfa;
    2920             :   reg_errcode_t err = REG_NOERROR;
    2921             :   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
    2922             :   re_dfastate_t *cur_state = NULL;
    2923             :   re_node_set *cur_nodes, next_nodes;
    2924           0 :   re_dfastate_t **backup_state_log;
    2925             :   unsigned int context;
    2926           0 : 
    2927             :   subexp_num = dfa->nodes[top_node].opr.idx;
    2928             :   /* Extend the buffer if we need.  */
    2929           0 :   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
    2930           0 :     {
    2931           0 :       re_dfastate_t **new_array;
    2932           0 :       Idx old_alloc = path->alloc;
    2933           0 :       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
    2934           0 :       if (BE (new_alloc < old_alloc, 0)
    2935           0 :           || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
    2936           0 :         return REG_ESPACE;
    2937           0 :       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
    2938           0 :       if (BE (new_array == NULL, 0))
    2939           0 :         return REG_ESPACE;
    2940           0 :       path->array = new_array;
    2941             :       path->alloc = new_alloc;
    2942             :       memset (new_array + old_alloc, '\0',
    2943           0 :               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
    2944             :     }
    2945             : 
    2946           0 :   str_idx = path->next_idx ? path->next_idx : top_str;
    2947           0 : 
    2948           0 :   /* Temporary modify MCTX.  */
    2949           0 :   backup_state_log = mctx->state_log;
    2950             :   backup_cur_idx = mctx->input.cur_idx;
    2951             :   mctx->state_log = path->array;
    2952           0 :   mctx->input.cur_idx = str_idx;
    2953           0 : 
    2954             :   /* Setup initial node set.  */
    2955           0 :   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
    2956           0 :   if (str_idx == top_str)
    2957           0 :     {
    2958           0 :       err = re_node_set_init_1 (&next_nodes, top_node);
    2959           0 :       if (BE (err != REG_NOERROR, 0))
    2960             :         return err;
    2961           0 :       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
    2962           0 :       if (BE (err != REG_NOERROR, 0))
    2963             :         {
    2964             :           re_node_set_free (&next_nodes);
    2965             :           return err;
    2966             :         }
    2967           0 :     }
    2968           0 :   else
    2969             :     {
    2970           0 :       cur_state = mctx->state_log[str_idx];
    2971           0 :       if (cur_state && cur_state->has_backref)
    2972           0 :         {
    2973             :           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
    2974             :           if (BE (err != REG_NOERROR, 0))
    2975           0 :             return err;
    2976             :         }
    2977           0 :       else
    2978             :         re_node_set_init_empty (&next_nodes);
    2979           0 :     }
    2980             :   if (str_idx == top_str || (cur_state && cur_state->has_backref))
    2981           0 :     {
    2982             :       if (next_nodes.nelem)
    2983           0 :         {
    2984             :           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
    2985           0 :                                     subexp_num, type);
    2986           0 :           if (BE (err != REG_NOERROR, 0))
    2987             :             {
    2988             :               re_node_set_free (&next_nodes);
    2989           0 :               return err;
    2990           0 :             }
    2991             :         }
    2992           0 :       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
    2993           0 :       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
    2994             :         {
    2995           0 :           re_node_set_free (&next_nodes);
    2996             :           return err;
    2997             :         }
    2998           0 :       mctx->state_log[str_idx] = cur_state;
    2999             :     }
    3000           0 : 
    3001           0 :   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
    3002             :     {
    3003           0 :       re_node_set_empty (&next_nodes);
    3004           0 :       if (mctx->state_log[str_idx + 1])
    3005           0 :         {
    3006             :           err = re_node_set_merge (&next_nodes,
    3007           0 :                                    &mctx->state_log[str_idx + 1]->nodes);
    3008           0 :           if (BE (err != REG_NOERROR, 0))
    3009             :             {
    3010             :               re_node_set_free (&next_nodes);
    3011           0 :               return err;
    3012             :             }
    3013           0 :         }
    3014             :       if (cur_state)
    3015             :         {
    3016           0 :           err = check_arrival_add_next_nodes (mctx, str_idx,
    3017             :                                               &cur_state->non_eps_nodes,
    3018           0 :                                               &next_nodes);
    3019           0 :           if (BE (err != REG_NOERROR, 0))
    3020             :             {
    3021             :               re_node_set_free (&next_nodes);
    3022           0 :               return err;
    3023           0 :             }
    3024             :         }
    3025           0 :       ++str_idx;
    3026           0 :       if (next_nodes.nelem)
    3027             :         {
    3028           0 :           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
    3029           0 :           if (BE (err != REG_NOERROR, 0))
    3030             :             {
    3031           0 :               re_node_set_free (&next_nodes);
    3032             :               return err;
    3033           0 :             }
    3034             :           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
    3035           0 :                                     subexp_num, type);
    3036           0 :           if (BE (err != REG_NOERROR, 0))
    3037             :             {
    3038             :               re_node_set_free (&next_nodes);
    3039           0 :               return err;
    3040           0 :             }
    3041           0 :         }
    3042             :       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
    3043           0 :       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
    3044           0 :       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
    3045             :         {
    3046           0 :           re_node_set_free (&next_nodes);
    3047           0 :           return err;
    3048             :         }
    3049           0 :       mctx->state_log[str_idx] = cur_state;
    3050           0 :       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
    3051           0 :     }
    3052           0 :   re_node_set_free (&next_nodes);
    3053             :   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
    3054             :                : &mctx->state_log[last_str]->nodes);
    3055           0 :   path->next_idx = str_idx;
    3056           0 : 
    3057             :   /* Fix MCTX.  */
    3058             :   mctx->state_log = backup_state_log;
    3059           0 :   mctx->input.cur_idx = backup_cur_idx;
    3060           0 : 
    3061             :   /* Then check the current node set has the node LAST_NODE.  */
    3062           0 :   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
    3063             :     return REG_NOERROR;
    3064             : 
    3065             :   return REG_NOMATCH;
    3066             : }
    3067             : 
    3068             : /* Helper functions for check_arrival.  */
    3069             : 
    3070             : /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
    3071             :    to NEXT_NODES.
    3072             :    TODO: This function is similar to the functions transit_state*(),
    3073             :          however this function has many additional works.
    3074             :          Can't we unify them?  */
    3075           0 : 
    3076             : static reg_errcode_t
    3077             : internal_function
    3078           0 : check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
    3079             :                               re_node_set *cur_nodes, re_node_set *next_nodes)
    3080             : {
    3081           0 :   const re_dfa_t *const dfa = mctx->dfa;
    3082             :   bool ok;
    3083           0 :   Idx cur_idx;
    3084           0 :   reg_errcode_t err = REG_NOERROR;
    3085             :   re_node_set union_set;
    3086           0 :   re_node_set_init_empty (&union_set);
    3087           0 :   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
    3088             :     {
    3089             :       int naccepted = 0;
    3090             :       Idx cur_node = cur_nodes->elems[cur_idx];
    3091             : #ifdef DEBUG
    3092             :       re_token_type_t type = dfa->nodes[cur_node].type;
    3093             :       assert (!IS_EPSILON_NODE (type));
    3094           0 : #endif
    3095             : #ifdef RE_ENABLE_I18N
    3096           0 :       /* If the node may accept `multi byte'.  */
    3097             :       if (dfa->nodes[cur_node].accept_mb)
    3098           0 :         {
    3099             :           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
    3100             :                                                str_idx);
    3101           0 :           if (naccepted > 1)
    3102           0 :             {
    3103           0 :               re_dfastate_t *dest_state;
    3104           0 :               Idx next_node = dfa->nexts[cur_node];
    3105           0 :               Idx next_idx = str_idx + naccepted;
    3106             :               dest_state = mctx->state_log[next_idx];
    3107           0 :               re_node_set_empty (&union_set);
    3108           0 :               if (dest_state)
    3109             :                 {
    3110           0 :                   err = re_node_set_merge (&union_set, &dest_state->nodes);
    3111           0 :                   if (BE (err != REG_NOERROR, 0))
    3112             :                     {
    3113             :                       re_node_set_free (&union_set);
    3114           0 :                       return err;
    3115           0 :                     }
    3116             :                 }
    3117           0 :               ok = re_node_set_insert (&union_set, next_node);
    3118           0 :               if (BE (! ok, 0))
    3119             :                 {
    3120           0 :                   re_node_set_free (&union_set);
    3121             :                   return REG_ESPACE;
    3122           0 :                 }
    3123             :               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
    3124             :                                                             &union_set);
    3125           0 :               if (BE (mctx->state_log[next_idx] == NULL
    3126           0 :                       && err != REG_NOERROR, 0))
    3127             :                 {
    3128             :                   re_node_set_free (&union_set);
    3129             :                   return err;
    3130             :                 }
    3131           0 :             }
    3132           0 :         }
    3133             : #endif /* RE_ENABLE_I18N */
    3134           0 :       if (naccepted
    3135           0 :           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
    3136             :         {
    3137           0 :           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
    3138           0 :           if (BE (! ok, 0))
    3139             :             {
    3140             :               re_node_set_free (&union_set);
    3141             :               return REG_ESPACE;
    3142           0 :             }
    3143           0 :         }
    3144             :     }
    3145             :   re_node_set_free (&union_set);
    3146             :   return REG_NOERROR;
    3147             : }
    3148             : 
    3149             : /* For all the nodes in CUR_NODES, add the epsilon closures of them to
    3150             :    CUR_NODES, however exclude the nodes which are:
    3151             :     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
    3152             :     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
    3153             : */
    3154           0 : 
    3155             : static reg_errcode_t
    3156             : internal_function
    3157             : check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
    3158             :                           Idx ex_subexp, int type)
    3159             : {
    3160             :   reg_errcode_t err;
    3161             :   Idx idx, outside_node;
    3162             :   re_node_set new_nodes;
    3163           0 : #ifdef DEBUG
    3164           0 :   assert (cur_nodes->nelem);
    3165           0 : #endif
    3166             :   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
    3167             :   if (BE (err != REG_NOERROR, 0))
    3168             :     return err;
    3169           0 :   /* Create a new node set NEW_NODES with the nodes which are epsilon
    3170             :      closures of the node in CUR_NODES.  */
    3171           0 : 
    3172           0 :   for (idx = 0; idx < cur_nodes->nelem; ++idx)
    3173           0 :     {
    3174           0 :       Idx cur_node = cur_nodes->elems[idx];
    3175             :       const re_node_set *eclosure = dfa->eclosures + cur_node;
    3176             :       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
    3177           0 :       if (outside_node == REG_MISSING)
    3178           0 :         {
    3179             :           /* There are no problematic nodes, just merge them.  */
    3180           0 :           err = re_node_set_merge (&new_nodes, eclosure);
    3181           0 :           if (BE (err != REG_NOERROR, 0))
    3182             :             {
    3183             :               re_node_set_free (&new_nodes);
    3184             :               return err;
    3185             :             }
    3186             :         }
    3187           0 :       else
    3188             :         {
    3189           0 :           /* There are problematic nodes, re-calculate incrementally.  */
    3190             :           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
    3191           0 :                                               ex_subexp, type);
    3192           0 :           if (BE (err != REG_NOERROR, 0))
    3193             :             {
    3194             :               re_node_set_free (&new_nodes);
    3195             :               return err;
    3196           0 :             }
    3197           0 :         }
    3198           0 :     }
    3199             :   re_node_set_free (cur_nodes);
    3200             :   *cur_nodes = new_nodes;
    3201             :   return REG_NOERROR;
    3202             : }
    3203             : 
    3204             : /* Helper function for check_arrival_expand_ecl.
    3205             :    Check incrementally the epsilon closure of TARGET, and if it isn't
    3206             :    problematic append it to DST_NODES.  */
    3207           0 : 
    3208             : static reg_errcode_t
    3209             : internal_function
    3210             : check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
    3211           0 :                               Idx target, Idx ex_subexp, int type)
    3212             : {
    3213             :   Idx cur_node;
    3214             :   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
    3215           0 :     {
    3216           0 :       bool ok;
    3217             : 
    3218           0 :       if (dfa->nodes[cur_node].type == type
    3219             :           && dfa->nodes[cur_node].opr.idx == ex_subexp)
    3220           0 :         {
    3221           0 :           if (type == OP_CLOSE_SUBEXP)
    3222           0 :             {
    3223             :               ok = re_node_set_insert (dst_nodes, cur_node);
    3224           0 :               if (BE (! ok, 0))
    3225             :                 return REG_ESPACE;
    3226           0 :             }
    3227           0 :           break;
    3228           0 :         }
    3229           0 :       ok = re_node_set_insert (dst_nodes, cur_node);
    3230           0 :       if (BE (! ok, 0))
    3231           0 :         return REG_ESPACE;
    3232             :       if (dfa->edests[cur_node].nelem == 0)
    3233             :         break;
    3234           0 :       if (dfa->edests[cur_node].nelem == 2)
    3235           0 :         {
    3236             :           reg_errcode_t err;
    3237           0 :           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
    3238           0 :                                               dfa->edests[cur_node].elems[1],
    3239             :                                               ex_subexp, type);
    3240           0 :           if (BE (err != REG_NOERROR, 0))
    3241             :             return err;
    3242           0 :         }
    3243             :       cur_node = dfa->edests[cur_node].elems[0];
    3244             :     }
    3245             :   return REG_NOERROR;
    3246             : }
    3247             : 
    3248             : 
    3249             : /* For all the back references in the current state, calculate the
    3250             :    destination of the back references by the appropriate entry
    3251             :    in MCTX->BKREF_ENTS.  */
    3252           0 : 
    3253             : static reg_errcode_t
    3254             : internal_function
    3255           0 : expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
    3256             :                     Idx cur_str, Idx subexp_num, int type)
    3257           0 : {
    3258             :   const re_dfa_t *const dfa = mctx->dfa;
    3259             :   reg_errcode_t err;
    3260           0 :   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
    3261           0 :   struct re_backref_cache_entry *ent;
    3262             : 
    3263           0 :   if (cache_idx_start == REG_MISSING)
    3264           0 :     return REG_NOERROR;
    3265             : 
    3266             :  restart:
    3267             :   ent = mctx->bkref_ents + cache_idx_start;
    3268             :   do
    3269             :     {
    3270           0 :       Idx to_idx, next_node;
    3271           0 : 
    3272             :       /* Is this entry ENT is appropriate?  */
    3273           0 :       if (!re_node_set_contains (cur_nodes, ent->node))
    3274             :         continue; /* No.  */
    3275             : 
    3276           0 :       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
    3277             :       /* Calculate the destination of the back reference, and append it
    3278             :          to MCTX->STATE_LOG.  */
    3279             :       if (to_idx == cur_str)
    3280             :         {
    3281             :           /* The backreference did epsilon transit, we must re-check all the
    3282           0 :              node in the current state.  */
    3283           0 :           re_node_set new_dests;
    3284           0 :           reg_errcode_t err2, err3;
    3285           0 :           next_node = dfa->edests[ent->node].elems[0];
    3286           0 :           if (re_node_set_contains (cur_nodes, next_node))
    3287           0 :             continue;
    3288           0 :           err = re_node_set_init_1 (&new_dests, next_node);
    3289           0 :           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
    3290             :           err3 = re_node_set_merge (cur_nodes, &new_dests);
    3291             :           re_node_set_free (&new_dests);
    3292           0 :           if (BE (err != REG_NOERROR || err2 != REG_NOERROR
    3293           0 :                   || err3 != REG_NOERROR, 0))
    3294           0 :             {
    3295             :               err = (err != REG_NOERROR ? err
    3296             :                      : (err2 != REG_NOERROR ? err2 : err3));
    3297           0 :               return err;
    3298             :             }
    3299             :           /* TODO: It is still inefficient...  */
    3300             :           goto restart;
    3301             :         }
    3302           0 :       else
    3303           0 :         {
    3304             :           re_node_set union_set;
    3305             :           next_node = dfa->nexts[ent->node];
    3306           0 :           if (mctx->state_log[to_idx])
    3307             :             {
    3308           0 :               bool ok;
    3309           0 :               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
    3310           0 :                                         next_node))
    3311           0 :                 continue;
    3312           0 :               err = re_node_set_init_copy (&union_set,
    3313             :                                            &mctx->state_log[to_idx]->nodes);
    3314           0 :               ok = re_node_set_insert (&union_set, next_node);
    3315           0 :               if (BE (err != REG_NOERROR || ! ok, 0))
    3316           0 :                 {
    3317             :                   re_node_set_free (&union_set);
    3318             :                   err = err != REG_NOERROR ? err : REG_ESPACE;
    3319             :                   return err;
    3320             :                 }
    3321           0 :             }
    3322           0 :           else
    3323           0 :             {
    3324             :               err = re_node_set_init_1 (&union_set, next_node);
    3325           0 :               if (BE (err != REG_NOERROR, 0))
    3326           0 :                 return err;
    3327           0 :             }
    3328             :           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
    3329           0 :           re_node_set_free (&union_set);
    3330             :           if (BE (mctx->state_log[to_idx] == NULL
    3331             :                   && err != REG_NOERROR, 0))
    3332           0 :             return err;
    3333           0 :         }
    3334             :     }
    3335             :   while (ent++->more);
    3336             :   return REG_NOERROR;
    3337             : }
    3338             : 
    3339             : /* Build transition table for the state.
    3340             :    Return true if successful.  */
    3341         135 : 
    3342             : static bool
    3343             : internal_function
    3344             : build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
    3345             : {
    3346         135 :   reg_errcode_t err;
    3347             :   Idx i, j;
    3348         135 :   int ch;
    3349         135 :   bool need_word_trtable = false;
    3350             :   bitset_word_t elem, mask;
    3351             :   bool dests_node_malloced = false;
    3352         135 :   bool dest_states_malloced = false;
    3353             :   Idx ndests; /* Number of the destination states from `state'.  */
    3354             :   re_dfastate_t **trtable;
    3355             :   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
    3356             :   re_node_set follows, *dests_node;
    3357             :   bitset_t *dests_ch;
    3358             :   bitset_t acceptable;
    3359             : 
    3360             :   struct dests_alloc
    3361             :   {
    3362             :     re_node_set dests_node[SBC_MAX];
    3363             :     bitset_t dests_ch[SBC_MAX];
    3364             :   } *dests_alloc;
    3365             : 
    3366             :   /* We build DFA states which corresponds to the destination nodes
    3367             :      from `state'.  `dests_node[i]' represents the nodes which i-th
    3368             :      destination state contains, and `dests_ch[i]' represents the
    3369             :      characters which i-th destination state accepts.  */
    3370             :   if (__libc_use_alloca (sizeof (struct dests_alloc)))
    3371         135 :     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
    3372         135 :   else
    3373           0 :     {
    3374         135 :       dests_alloc = re_malloc (struct dests_alloc, 1);
    3375             :       if (BE (dests_alloc == NULL, 0))
    3376         135 :         return false;
    3377         135 :       dests_node_malloced = true;
    3378             :     }
    3379             :   dests_node = dests_alloc->dests_node;
    3380         135 :   dests_ch = dests_alloc->dests_ch;
    3381             : 
    3382             :   /* Initialize transiton table.  */
    3383             :   state->word_trtable = state->trtable = NULL;
    3384         135 : 
    3385         135 :   /* At first, group all nodes belonging to `state' into several
    3386             :      destinations.  */
    3387          63 :   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
    3388          63 :   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
    3389          63 :     {
    3390             :       if (dests_node_malloced)
    3391          63 :         free (dests_alloc);
    3392          63 :       if (ndests == 0)
    3393          63 :         {
    3394             :           state->trtable = (re_dfastate_t **)
    3395           0 :             calloc (sizeof (re_dfastate_t *), SBC_MAX);
    3396             :           return true;
    3397             :         }
    3398          72 :       return false;
    3399          72 :     }
    3400           0 : 
    3401             :   err = re_node_set_alloc (&follows, ndests + 1);
    3402             :   if (BE (err != REG_NOERROR, 0))
    3403          72 :     goto out_free;
    3404             : 
    3405             :   /* Avoid arithmetic overflow in size calculation.  */
    3406             :   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
    3407           0 :             / (3 * sizeof (re_dfastate_t *)))
    3408             :            < ndests),
    3409          72 :           0))
    3410             :     goto out_free;
    3411           0 : 
    3412           0 :   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
    3413             :                          + ndests * 3 * sizeof (re_dfastate_t *)))
    3414             :     dest_states = (re_dfastate_t **)
    3415          72 :       alloca (ndests * 3 * sizeof (re_dfastate_t *));
    3416          72 :   else
    3417          72 :     {
    3418             :       dest_states = (re_dfastate_t **)
    3419           0 :         malloc (ndests * 3 * sizeof (re_dfastate_t *));
    3420           0 :       if (BE (dest_states == NULL, 0))
    3421           0 :         {
    3422           0 : out_free:
    3423           0 :           if (dest_states_malloced)
    3424           0 :             free (dest_states);
    3425           0 :           re_node_set_free (&follows);
    3426           0 :           for (i = 0; i < ndests; ++i)
    3427           0 :             re_node_set_free (dests_node + i);
    3428             :           if (dests_node_malloced)
    3429          72 :             free (dests_alloc);
    3430             :           return false;
    3431          72 :         }
    3432          72 :       dest_states_malloced = true;
    3433          72 :     }
    3434             :   dest_states_word = dest_states + ndests;
    3435             :   dest_states_nl = dest_states_word + ndests;
    3436         165 :   bitset_empty (acceptable);
    3437             : 
    3438             :   /* Then build the states for all destinations.  */
    3439          93 :   for (i = 0; i < ndests; ++i)
    3440             :     {
    3441         186 :       Idx next_node;
    3442             :       re_node_set_empty (&follows);
    3443          93 :       /* Merge the follows of this destination states.  */
    3444          93 :       for (j = 0; j < dests_node[i].nelem; ++j)
    3445             :         {
    3446          93 :           next_node = dfa->nexts[dests_node[i].elems[j]];
    3447          93 :           if (next_node != REG_MISSING)
    3448           0 :             {
    3449             :               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
    3450             :               if (BE (err != REG_NOERROR, 0))
    3451          93 :                 goto out_free;
    3452          93 :             }
    3453           0 :         }
    3454             :       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
    3455             :       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
    3456          93 :         goto out_free;
    3457             :       /* If the new state has context constraint,
    3458          18 :          build appropriate states for these contexts.  */
    3459             :       if (dest_states[i]->has_constraint)
    3460          18 :         {
    3461           0 :           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
    3462             :                                                           CONTEXT_WORD);
    3463          18 :           if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
    3464           0 :             goto out_free;
    3465             : 
    3466          18 :           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
    3467             :             need_word_trtable = true;
    3468          18 : 
    3469           0 :           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
    3470             :                                                         CONTEXT_NEWLINE);
    3471             :           if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
    3472             :             goto out_free;
    3473          75 :         }
    3474          75 :       else
    3475             :         {
    3476          93 :           dest_states_word[i] = dest_states[i];
    3477             :           dest_states_nl[i] = dest_states[i];
    3478             :         }
    3479          72 :       bitset_merge (acceptable, dests_ch[i]);
    3480             :     }
    3481             : 
    3482             :   if (!BE (need_word_trtable, 0))
    3483             :     {
    3484             :       /* We don't care about whether the following character is a word
    3485          72 :          character, or we are in a single-byte character set so we can
    3486          72 :          discern by looking at the character code: allocate a
    3487          72 :          256-entry transition table.  */
    3488           0 :       trtable = state->trtable =
    3489             :         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
    3490             :       if (BE (trtable == NULL, 0))
    3491         360 :         goto out_free;
    3492        5681 : 
    3493             :       /* For all characters ch...:  */
    3494        5105 :       for (i = 0; i < BITSET_WORDS; ++i)
    3495        5105 :         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
    3496             :              elem;
    3497             :              mask <<= 1, elem >>= 1, ++ch)
    3498             :           if (BE (elem & 1, 0))
    3499        1858 :             {
    3500             :               /* There must be exactly one destination which accepts
    3501             :                  character ch.  See group_nodes_into_DFAstates.  */
    3502             :               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
    3503        1858 :                 ;
    3504           0 : 
    3505             :               /* j-th destination accepts the word character ch.  */
    3506        1858 :               if (dfa->word_char[i] & mask)
    3507             :                 trtable[ch] = dest_states_word[j];
    3508             :               else
    3509             :                 trtable[ch] = dest_states[j];
    3510             :             }
    3511             :     }
    3512             :   else
    3513             :     {
    3514             :       /* We care about whether the following character is a word
    3515             :          character, and we are in a multi-byte character set: discern
    3516           0 :          by looking at the character code: build two 256-entry
    3517           0 :          transition tables, one starting at trtable[0] and one
    3518           0 :          starting at trtable[SBC_MAX].  */
    3519           0 :       trtable = state->word_trtable =
    3520             :         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
    3521             :       if (BE (trtable == NULL, 0))
    3522           0 :         goto out_free;
    3523           0 : 
    3524             :       /* For all characters ch...:  */
    3525           0 :       for (i = 0; i < BITSET_WORDS; ++i)
    3526           0 :         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
    3527             :              elem;
    3528             :              mask <<= 1, elem >>= 1, ++ch)
    3529             :           if (BE (elem & 1, 0))
    3530           0 :             {
    3531             :               /* There must be exactly one destination which accepts
    3532             :                  character ch.  See group_nodes_into_DFAstates.  */
    3533             :               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
    3534           0 :                 ;
    3535           0 : 
    3536             :               /* j-th destination accepts the word character ch.  */
    3537             :               trtable[ch] = dest_states[j];
    3538             :               trtable[ch + SBC_MAX] = dest_states_word[j];
    3539             :             }
    3540          72 :     }
    3541             : 
    3542             :   /* new line */
    3543          47 :   if (bitset_contain (acceptable, NEWLINE_CHAR))
    3544          47 :     {
    3545             :       /* The current state accepts newline character.  */
    3546             :       for (j = 0; j < ndests; ++j)
    3547          26 :         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
    3548          26 :           {
    3549           0 :             /* k-th destination accepts newline character.  */
    3550             :             trtable[NEWLINE_CHAR] = dest_states_nl[j];
    3551             :             if (need_word_trtable)
    3552          26 :               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
    3553             :             /* There must be only one destination which accepts
    3554             :                newline.  See group_nodes_into_DFAstates.  */
    3555             :             break;
    3556          72 :           }
    3557          72 :     }
    3558             : 
    3559          72 :   if (dest_states_malloced)
    3560         165 :     free (dest_states);
    3561          93 : 
    3562             :   re_node_set_free (&follows);
    3563          72 :   for (i = 0; i < ndests; ++i)
    3564          72 :     re_node_set_free (dests_node + i);
    3565             : 
    3566          72 :   if (dests_node_malloced)
    3567             :     free (dests_alloc);
    3568             : 
    3569             :   return true;
    3570             : }
    3571             : 
    3572             : /* Group all nodes belonging to STATE into several destinations.
    3573             :    Then for all destinations, set the nodes belonging to the destination
    3574             :    to DESTS_NODE[i] and set the characters accepted by the destination
    3575             :    to DEST_CH[i].  This function return the number of destinations.  */
    3576         135 : 
    3577             : static Idx
    3578             : internal_function
    3579             : group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
    3580             :                             re_node_set *dests_node, bitset_t *dests_ch)
    3581             : {
    3582             :   reg_errcode_t err;
    3583             :   bool ok;
    3584         135 :   Idx i, j, k;
    3585         135 :   Idx ndests; /* Number of the destinations from `state'.  */
    3586         135 :   bitset_t accepts; /* Characters a node can accept.  */
    3587             :   const re_node_set *cur_nodes = &state->nodes;
    3588             :   bitset_empty (accepts);
    3589         405 :   ndests = 0;
    3590             : 
    3591         270 :   /* For all the nodes belonging to `state',  */
    3592         270 :   for (i = 0; i < cur_nodes->nelem; ++i)
    3593         270 :     {
    3594             :       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
    3595             :       re_token_type_t type = node->type;
    3596         270 :       unsigned int constraint = node->constraint;
    3597          56 : 
    3598         214 :       /* Enumerate all single byte character this node can accept.  */
    3599             :       if (type == CHARACTER)
    3600          35 :         bitset_set (accepts, node->opr.c);
    3601             :       else if (type == SIMPLE_BRACKET)
    3602         179 :         {
    3603             :           bitset_merge (accepts, node->opr.sbcset);
    3604             :         }
    3605           2 :       else if (type == OP_PERIOD)
    3606           0 :         {
    3607             : #ifdef RE_ENABLE_I18N
    3608             :           if (dfa->mb_cur_max > 1)
    3609           2 :             bitset_merge (accepts, dfa->sb_char);
    3610           2 :           else
    3611           0 : #endif
    3612           2 :             bitset_set_all (accepts);
    3613           2 :           if (!(dfa->syntax & RE_DOT_NEWLINE))
    3614             :             bitset_clear (accepts, '\n');
    3615             :           if (dfa->syntax & RE_DOT_NOT_NULL)
    3616         177 :             bitset_clear (accepts, '\0');
    3617             :         }
    3618             : #ifdef RE_ENABLE_I18N
    3619           0 :       else if (type == OP_UTF8_PERIOD)
    3620             :         {
    3621             :           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
    3622           0 :             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
    3623           0 :           else
    3624           0 :             bitset_merge (accepts, utf8_sb_map);
    3625           0 :           if (!(dfa->syntax & RE_DOT_NEWLINE))
    3626             :             bitset_clear (accepts, '\n');
    3627             :           if (dfa->syntax & RE_DOT_NOT_NULL)
    3628             :             bitset_clear (accepts, '\0');
    3629         177 :         }
    3630             : #endif
    3631             :       else
    3632             :         continue;
    3633          93 : 
    3634             :       /* Check the `accepts' and sift the characters which are not
    3635           7 :          match it the context.  */
    3636             :       if (constraint)
    3637           7 :         {
    3638           7 :           if (constraint & NEXT_NEWLINE_CONSTRAINT)
    3639           7 :             {
    3640           7 :               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
    3641             :               bitset_empty (accepts);
    3642           0 :               if (accepts_newline)
    3643             :                 bitset_set (accepts, NEWLINE_CHAR);
    3644           7 :               else
    3645             :                 continue;
    3646           0 :             }
    3647           0 :           if (constraint & NEXT_ENDBUF_CONSTRAINT)
    3648             :             {
    3649             :               bitset_empty (accepts);
    3650           7 :               continue;
    3651             :             }
    3652           0 : 
    3653           0 :           if (constraint & NEXT_WORD_CONSTRAINT)
    3654             :             {
    3655           0 :               bitset_word_t any_set = 0;
    3656           0 :               if (type == CHARACTER && !node->word_char)
    3657             :                 {
    3658             :                   bitset_empty (accepts);
    3659           0 :                   continue;
    3660           0 :                 }
    3661           0 : #ifdef RE_ENABLE_I18N
    3662             :               if (dfa->mb_cur_max > 1)
    3663             :                 for (j = 0; j < BITSET_WORDS; ++j)
    3664           0 :                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
    3665           0 :               else
    3666           0 : #endif
    3667           0 :                 for (j = 0; j < BITSET_WORDS; ++j)
    3668             :                   any_set |= (accepts[j] &= dfa->word_char[j]);
    3669           7 :               if (!any_set)
    3670             :                 continue;
    3671           0 :             }
    3672           0 :           if (constraint & NEXT_NOTWORD_CONSTRAINT)
    3673             :             {
    3674           0 :               bitset_word_t any_set = 0;
    3675           0 :               if (type == CHARACTER && node->word_char)
    3676             :                 {
    3677             :                   bitset_empty (accepts);
    3678           0 :                   continue;
    3679           0 :                 }
    3680           0 : #ifdef RE_ENABLE_I18N
    3681             :               if (dfa->mb_cur_max > 1)
    3682             :                 for (j = 0; j < BITSET_WORDS; ++j)
    3683           0 :                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
    3684           0 :               else
    3685           0 : #endif
    3686           0 :                 for (j = 0; j < BITSET_WORDS; ++j)
    3687             :                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
    3688             :               if (!any_set)
    3689             :                 continue;
    3690             :             }
    3691             :         }
    3692         135 : 
    3693             :       /* Then divide `accepts' into DFA states, or create a new
    3694             :          state.  Above, we make sure that accepts is not empty.  */
    3695             :       for (j = 0; j < ndests; ++j)
    3696             :         {
    3697             :           bitset_t intersec; /* Intersection sets, see below.  */
    3698             :           bitset_t remains;
    3699             :           /* Flags, see below.  */
    3700          42 :           bitset_word_t has_intersec, not_subset, not_consumed;
    3701          63 : 
    3702             :           /* Optimization, skip if this state doesn't accept the character.  */
    3703             :           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
    3704          21 :             continue;
    3705         105 : 
    3706          84 :           /* Enumerate the intersection set of this state and `accepts'.  */
    3707             :           has_intersec = 0;
    3708          21 :           for (k = 0; k < BITSET_WORDS; ++k)
    3709          21 :             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
    3710             :           /* And skip if the intersection set is empty.  */
    3711             :           if (!has_intersec)
    3712           0 :             continue;
    3713           0 : 
    3714             :           /* Then check if this state is a subset of `accepts'.  */
    3715           0 :           not_subset = not_consumed = 0;
    3716           0 :           for (k = 0; k < BITSET_WORDS; ++k)
    3717             :             {
    3718             :               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
    3719             :               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
    3720             :             }
    3721           0 : 
    3722             :           /* If this state isn't a subset of `accepts', create a
    3723           0 :              new group state, which has the `remains'. */
    3724           0 :           if (not_subset)
    3725           0 :             {
    3726           0 :               bitset_copy (dests_ch[ndests], remains);
    3727           0 :               bitset_copy (dests_ch[j], intersec);
    3728           0 :               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
    3729             :               if (BE (err != REG_NOERROR, 0))
    3730             :                 goto error_return;
    3731             :               ++ndests;
    3732           0 :             }
    3733           0 : 
    3734           0 :           /* Put the position in the current group. */
    3735             :           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
    3736             :           if (BE (! ok, 0))
    3737           0 :             goto error_return;
    3738           0 : 
    3739             :           /* If all characters are consumed, go to next node. */
    3740             :           if (!not_consumed)
    3741          93 :             break;
    3742             :         }
    3743          93 :       /* Some characters remain, create a new group. */
    3744          93 :       if (j == ndests)
    3745          93 :         {
    3746           0 :           bitset_copy (dests_ch[ndests], accepts);
    3747          93 :           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
    3748          93 :           if (BE (err != REG_NOERROR, 0))
    3749             :             goto error_return;
    3750             :           ++ndests;
    3751         135 :           bitset_empty (accepts);
    3752           0 :         }
    3753           0 :     }
    3754           0 :   return ndests;
    3755           0 :  error_return:
    3756             :   for (j = 0; j < ndests; ++j)
    3757             :     re_node_set_free (dests_node + j);
    3758             :   return REG_MISSING;
    3759             : }
    3760             : 
    3761             : #ifdef RE_ENABLE_I18N
    3762             : /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
    3763             :    Return the number of the bytes the node accepts.
    3764             :    STR_IDX is the current index of the input string.
    3765             : 
    3766             :    This function handles the nodes which can accept one character, or
    3767             :    one collating element like '.', '[a-z]', opposite to the other nodes
    3768             :    can only accept one byte.  */
    3769           0 : 
    3770             : static int
    3771             : internal_function
    3772           0 : check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
    3773             :                          const re_string_t *input, Idx str_idx)
    3774             : {
    3775             :   const re_token_t *node = dfa->nodes + node_idx;
    3776           0 :   int char_len, elem_len;
    3777             :   Idx i;
    3778           0 : 
    3779           0 :   if (BE (node->type == OP_UTF8_PERIOD, 0))
    3780           0 :     {
    3781             :       unsigned char c = re_string_byte_at (input, str_idx), d;
    3782           0 :       if (BE (c < 0xc2, 1))
    3783           0 :         return 0;
    3784             : 
    3785           0 :       if (str_idx + 2 > input->len)
    3786           0 :         return 0;
    3787           0 : 
    3788           0 :       d = re_string_byte_at (input, str_idx + 1);
    3789             :       if (c < 0xe0)
    3790           0 :         return (d < 0x80 || d > 0xbf) ? 0 : 2;
    3791           0 :       else if (c < 0xf0)
    3792           0 :         {
    3793             :           char_len = 3;
    3794           0 :           if (c == 0xe0 && d < 0xa0)
    3795             :             return 0;
    3796           0 :         }
    3797           0 :       else if (c < 0xf8)
    3798           0 :         {
    3799             :           char_len = 4;
    3800           0 :           if (c == 0xf0 && d < 0x90)
    3801             :             return 0;
    3802           0 :         }
    3803           0 :       else if (c < 0xfc)
    3804           0 :         {
    3805             :           char_len = 5;
    3806           0 :           if (c == 0xf8 && d < 0x88)
    3807             :             return 0;
    3808           0 :         }
    3809           0 :       else if (c < 0xfe)
    3810           0 :         {
    3811             :           char_len = 6;
    3812             :           if (c == 0xfc && d < 0x84)
    3813           0 :             return 0;
    3814             :         }
    3815           0 :       else
    3816           0 :         return 0;
    3817             : 
    3818           0 :       if (str_idx + char_len > input->len)
    3819             :         return 0;
    3820           0 : 
    3821           0 :       for (i = 1; i < char_len; ++i)
    3822           0 :         {
    3823             :           d = re_string_byte_at (input, str_idx + i);
    3824           0 :           if (d < 0x80 || d > 0xbf)
    3825             :             return 0;
    3826             :         }
    3827           0 :       return char_len;
    3828           0 :     }
    3829             : 
    3830           0 :   char_len = re_string_char_size_at (input, str_idx);
    3831           0 :   if (node->type == OP_PERIOD)
    3832             :     {
    3833             :       if (char_len <= 1)
    3834             :         return 0;
    3835           0 :       /* FIXME: I don't think this if is needed, as both '\n'
    3836           0 :          and '\0' are char_len == 1.  */
    3837           0 :       /* '.' accepts any one character except the following two cases.  */
    3838           0 :       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
    3839           0 :            re_string_byte_at (input, str_idx) == '\n') ||
    3840           0 :           ((dfa->syntax & RE_DOT_NOT_NULL) &&
    3841             :            re_string_byte_at (input, str_idx) == '\0'))
    3842             :         return 0;
    3843           0 :       return char_len;
    3844           0 :     }
    3845           0 : 
    3846             :   elem_len = re_string_elem_size_at (input, str_idx);
    3847           0 :   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
    3848             :     return 0;
    3849           0 : 
    3850             :   if (node->type == COMPLEX_BRACKET)
    3851             :     {
    3852             :       const re_charset_t *cset = node->opr.mbcset;
    3853             : # ifdef _LIBC
    3854             :       const unsigned char *pin
    3855             :         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
    3856           0 :       Idx j;
    3857           0 :       uint32_t nrules;
    3858           0 : # endif /* _LIBC */
    3859             :       int match_len = 0;
    3860             :       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
    3861           0 :                     ? re_string_wchar_at (input, str_idx) : 0);
    3862           0 : 
    3863             :       /* match with multibyte character?  */
    3864           0 :       for (i = 0; i < cset->nmbchars; ++i)
    3865           0 :         if (wc == cset->mbchars[i])
    3866             :           {
    3867             :             match_len = char_len;
    3868           0 :             goto check_node_accept_bytes_match;
    3869             :           }
    3870           0 :       /* match with character_class?  */
    3871           0 :       for (i = 0; i < cset->nchar_classes; ++i)
    3872             :         {
    3873           0 :           wctype_t wt = cset->char_classes[i];
    3874           0 :           if (__iswctype (wc, wt))
    3875             :             {
    3876             :               match_len = char_len;
    3877             :               goto check_node_accept_bytes_match;
    3878             :             }
    3879             :         }
    3880             : 
    3881             : # ifdef _LIBC
    3882             :       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
    3883             :       if (nrules != 0)
    3884             :         {
    3885             :           unsigned int in_collseq = 0;
    3886             :           const int32_t *table, *indirect;
    3887             :           const unsigned char *weights, *extra;
    3888             :           const char *collseqwc;
    3889             :           int32_t idx;
    3890             :           /* This #include defines a local function!  */
    3891             : #  include <locale/weight.h>
    3892             : 
    3893             :           /* match with collating_symbol?  */
    3894             :           if (cset->ncoll_syms)
    3895             :             extra = (const unsigned char *)
    3896             :               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
    3897             :           for (i = 0; i < cset->ncoll_syms; ++i)
    3898             :             {
    3899             :               const unsigned char *coll_sym = extra + cset->coll_syms[i];
    3900             :               /* Compare the length of input collating element and
    3901             :                  the length of current collating element.  */
    3902             :               if (*coll_sym != elem_len)
    3903             :                 continue;
    3904             :               /* Compare each bytes.  */
    3905             :               for (j = 0; j < *coll_sym; j++)
    3906             :                 if (pin[j] != coll_sym[1 + j])
    3907             :                   break;
    3908             :               if (j == *coll_sym)
    3909             :                 {
    3910             :                   /* Match if every bytes is equal.  */
    3911             :                   match_len = j;
    3912             :                   goto check_node_accept_bytes_match;
    3913             :                 }
    3914             :             }
    3915             : 
    3916             :           if (cset->nranges)
    3917             :             {
    3918             :               if (elem_len <= char_len)
    3919             :                 {
    3920             :                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
    3921             :                   in_collseq = __collseq_table_lookup (collseqwc, wc);
    3922             :                 }
    3923             :               else
    3924             :                 in_collseq = find_collation_sequence_value (pin, elem_len);
    3925             :             }
    3926             :           /* match with range expression?  */
    3927             :           for (i = 0; i < cset->nranges; ++i)
    3928             :             if (cset->range_starts[i] <= in_collseq
    3929             :                 && in_collseq <= cset->range_ends[i])
    3930             :               {
    3931             :                 match_len = elem_len;
    3932             :                 goto check_node_accept_bytes_match;
    3933             :               }
    3934             : 
    3935             :           /* match with equivalence_class?  */
    3936             :           if (cset->nequiv_classes)
    3937             :             {
    3938             :               const unsigned char *cp = pin;
    3939             :               table = (const int32_t *)
    3940             :                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
    3941             :               weights = (const unsigned char *)
    3942             :                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
    3943             :               extra = (const unsigned char *)
    3944             :                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
    3945             :               indirect = (const int32_t *)
    3946             :                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
    3947             :               idx = findidx (&cp);
    3948             :               if (idx > 0)
    3949             :                 for (i = 0; i < cset->nequiv_classes; ++i)
    3950             :                   {
    3951             :                     int32_t equiv_class_idx = cset->equiv_classes[i];
    3952             :                     size_t weight_len = weights[idx];
    3953             :                     if (weight_len == weights[equiv_class_idx])
    3954             :                       {
    3955             :                         Idx cnt = 0;
    3956             :                         while (cnt <= weight_len
    3957             :                                && (weights[equiv_class_idx + 1 + cnt]
    3958             :                                    == weights[idx + 1 + cnt]))
    3959             :                           ++cnt;
    3960             :                         if (cnt > weight_len)
    3961             :                           {
    3962             :                             match_len = elem_len;
    3963             :                             goto check_node_accept_bytes_match;
    3964             :                           }
    3965             :                       }
    3966             :                   }
    3967             :             }
    3968             :         }
    3969             :       else
    3970             : # endif /* _LIBC */
    3971           0 :         {
    3972             :           /* match with range expression?  */
    3973             : #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
    3974             :           wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
    3975             : #else
    3976           0 :           wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
    3977             :           cmp_buf[2] = wc;
    3978           0 : #endif
    3979           0 :           for (i = 0; i < cset->nranges; ++i)
    3980           0 :             {
    3981           0 :               cmp_buf[0] = cset->range_starts[i];
    3982             :               cmp_buf[4] = cset->range_ends[i];
    3983           0 :               if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
    3984           0 :                   && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
    3985             :                 {
    3986             :                   match_len = char_len;
    3987             :                   goto check_node_accept_bytes_match;
    3988           0 :                 }
    3989           0 :             }
    3990           0 :         }
    3991             :     check_node_accept_bytes_match:
    3992             :       if (!cset->non_match)
    3993           0 :         return match_len;
    3994           0 :       else
    3995             :         {
    3996           0 :           if (match_len > 0)
    3997             :             return 0;
    3998             :           else
    3999           0 :             return (elem_len > char_len) ? elem_len : char_len;
    4000             :         }
    4001             :     }
    4002             :   return 0;
    4003             : }
    4004             : 
    4005             : # ifdef _LIBC
    4006             : static unsigned int
    4007             : internal_function
    4008             : find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
    4009             : {
    4010             :   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
    4011             :   if (nrules == 0)
    4012             :     {
    4013             :       if (mbs_len == 1)
    4014             :         {
    4015             :           /* No valid character.  Match it as a single byte character.  */
    4016             :           const unsigned char *collseq = (const unsigned char *)
    4017             :             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
    4018             :           return collseq[mbs[0]];
    4019             :         }
    4020             :       return UINT_MAX;
    4021             :     }
    4022             :   else
    4023             :     {
    4024             :       int32_t idx;
    4025             :       const unsigned char *extra = (const unsigned char *)
    4026             :         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
    4027             :       int32_t extrasize = (const unsigned char *)
    4028             :         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
    4029             : 
    4030             :       for (idx = 0; idx < extrasize;)
    4031             :         {
    4032             :           int mbs_cnt;
    4033             :           bool found = false;
    4034             :           int32_t elem_mbs_len;
    4035             :           /* Skip the name of collating element name.  */
    4036             :           idx = idx + extra[idx] + 1;
    4037             :           elem_mbs_len = extra[idx++];
    4038             :           if (mbs_len == elem_mbs_len)
    4039             :             {
    4040             :               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
    4041             :                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
    4042             :                   break;
    4043             :               if (mbs_cnt == elem_mbs_len)
    4044             :                 /* Found the entry.  */
    4045             :                 found = true;
    4046             :             }
    4047             :           /* Skip the byte sequence of the collating element.  */
    4048             :           idx += elem_mbs_len;
    4049             :           /* Adjust for the alignment.  */
    4050             :           idx = (idx + 3) & ~3;
    4051             :           /* Skip the collation sequence value.  */
    4052             :           idx += sizeof (uint32_t);
    4053             :           /* Skip the wide char sequence of the collating element.  */
    4054             :           idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
    4055             :           /* If we found the entry, return the sequence value.  */
    4056             :           if (found)
    4057             :             return *(uint32_t *) (extra + idx);
    4058             :           /* Skip the collation sequence value.  */
    4059             :           idx += sizeof (uint32_t);
    4060             :         }
    4061             :       return UINT_MAX;
    4062             :     }
    4063             : }
    4064             : # endif /* _LIBC */
    4065             : #endif /* RE_ENABLE_I18N */
    4066             : 
    4067             : /* Check whether the node accepts the byte which is IDX-th
    4068             :    byte of the INPUT.  */
    4069          42 : 
    4070             : static bool
    4071             : internal_function
    4072             : check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
    4073          42 :                    Idx idx)
    4074          42 : {
    4075             :   unsigned char ch;
    4076          10 :   ch = re_string_byte_at (&mctx->input, idx);
    4077          10 :   switch (node->type)
    4078          10 :     {
    4079           0 :     case CHARACTER:
    4080             :       if (node->opr.c != ch)
    4081          27 :         return false;
    4082          27 :       break;
    4083           5 : 
    4084          22 :     case SIMPLE_BRACKET:
    4085             :       if (!bitset_contain (node->opr.sbcset, ch))
    4086             :         return false;
    4087           0 :       break;
    4088           0 : 
    4089           0 : #ifdef RE_ENABLE_I18N
    4090             :     case OP_UTF8_PERIOD:
    4091             :       if (ch >= ASCII_CHARS)
    4092             :         return false;
    4093           0 :       /* FALLTHROUGH */
    4094           0 : #endif
    4095           0 :     case OP_PERIOD:
    4096           0 :       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
    4097             :           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
    4098           5 :         return false;
    4099           5 :       break;
    4100             : 
    4101             :     default:
    4102          22 :       return false;
    4103             :     }
    4104             : 
    4105             :   if (node->constraint)
    4106          10 :     {
    4107             :       /* The node has constraints.  Check whether the current context
    4108          10 :          satisfies the constraints.  */
    4109           0 :       unsigned int context = re_string_context_at (&mctx->input, idx,
    4110             :                                                    mctx->eflags);
    4111             :       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
    4112          22 :         return false;
    4113             :     }
    4114             : 
    4115             :   return true;
    4116             : }
    4117             : 
    4118             : /* Extend the buffers, if the buffers have run out.  */
    4119           0 : 
    4120             : static reg_errcode_t
    4121             : internal_function
    4122           0 : extend_buffers (re_match_context_t *mctx)
    4123             : {
    4124             :   reg_errcode_t ret;
    4125           0 :   re_string_t *pstr = &mctx->input;
    4126           0 : 
    4127             :   /* Avoid overflow.  */
    4128             :   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
    4129           0 :     return REG_ESPACE;
    4130           0 : 
    4131           0 :   /* Double the lengthes of the buffers.  */
    4132             :   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
    4133           0 :   if (BE (ret != REG_NOERROR, 0))
    4134             :     return ret;
    4135             : 
    4136             :   if (mctx->state_log != NULL)
    4137             :     {
    4138             :       /* And double the length of state_log.  */
    4139           0 :       /* XXX We have no indication of the size of this buffer.  If this
    4140             :          allocation fail we have no indication that the state_log array
    4141           0 :          does not have the right size.  */
    4142           0 :       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
    4143           0 :                                               pstr->bufs_len + 1);
    4144             :       if (BE (new_array == NULL, 0))
    4145             :         return REG_ESPACE;
    4146             :       mctx->state_log = new_array;
    4147           0 :     }
    4148             : 
    4149             :   /* Then reconstruct the buffers.  */
    4150           0 :   if (pstr->icase)
    4151             :     {
    4152           0 : #ifdef RE_ENABLE_I18N
    4153           0 :       if (pstr->mb_cur_max > 1)
    4154           0 :         {
    4155             :           ret = build_wcs_upper_buffer (pstr);
    4156             :           if (BE (ret != REG_NOERROR, 0))
    4157             :             return ret;
    4158           0 :         }
    4159             :       else
    4160             : #endif /* RE_ENABLE_I18N  */
    4161             :         build_upper_buffer (pstr);
    4162             :     }
    4163           0 :   else
    4164           0 :     {
    4165             : #ifdef RE_ENABLE_I18N
    4166             :       if (pstr->mb_cur_max > 1)
    4167             :         build_wcs_buffer (pstr);
    4168           0 :       else
    4169           0 : #endif /* RE_ENABLE_I18N  */
    4170             :         {
    4171             :           if (pstr->trans != NULL)
    4172           0 :             re_string_translate_buffer (pstr);
    4173             :         }
    4174             :     }
    4175             :   return REG_NOERROR;
    4176             : }
    4177             : 
    4178             : 
    4179             : /* Functions for matching context.  */
    4180             : 
    4181             : /* Initialize MCTX.  */
    4182         356 : 
    4183             : static reg_errcode_t
    4184         356 : internal_function
    4185         356 : match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
    4186         356 : {
    4187             :   mctx->eflags = eflags;
    4188             :   mctx->match_last = REG_MISSING;
    4189           0 :   if (n > 0)
    4190             :     {
    4191             :       /* Avoid overflow.  */
    4192           0 :       size_t max_object_size =
    4193           0 :         MAX (sizeof (struct re_backref_cache_entry),
    4194             :              sizeof (re_sub_match_top_t *));
    4195           0 :       if (BE (SIZE_MAX / max_object_size < n, 0))
    4196           0 :         return REG_ESPACE;
    4197           0 : 
    4198           0 :       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
    4199             :       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
    4200             :       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
    4201             :         return REG_ESPACE;
    4202             :     }
    4203             :   /* Already zero-ed by the caller.
    4204             :      else
    4205         356 :        mctx->bkref_ents = NULL;
    4206         356 :      mctx->nbkref_ents = 0;
    4207         356 :      mctx->nsub_tops = 0;  */
    4208         356 :   mctx->abkref_ents = n;
    4209             :   mctx->max_mb_elem_len = 1;
    4210             :   mctx->asub_tops = n;
    4211             :   return REG_NOERROR;
    4212             : }
    4213             : 
    4214             : /* Clean the entries which depend on the current input in MCTX.
    4215             :    This function must be invoked when the matcher changes the start index
    4216             :    of the input, or changes the input string.  */
    4217         138 : 
    4218             : static void
    4219             : internal_function
    4220         138 : match_ctx_clean (re_match_context_t *mctx)
    4221             : {
    4222             :   Idx st_idx;
    4223           0 :   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
    4224           0 :     {
    4225             :       Idx sl_idx;
    4226           0 :       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
    4227           0 :       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
    4228           0 :         {
    4229             :           re_sub_match_last_t *last = top->lasts[sl_idx];
    4230           0 :           re_free (last->path.array);
    4231           0 :           re_free (last);
    4232             :         }
    4233           0 :       re_free (top->lasts);
    4234           0 :       if (top->path)
    4235             :         {
    4236           0 :           re_free (top->path->array);
    4237             :           re_free (top->path);
    4238             :         }
    4239         138 :       free (top);
    4240         138 :     }
    4241         138 : 
    4242             :   mctx->nsub_tops = 0;
    4243             :   mctx->nbkref_ents = 0;
    4244             : }
    4245             : 
    4246             : /* Free all the memory associated with MCTX.  */
    4247           0 : 
    4248             : static void
    4249             : internal_function
    4250           0 : match_ctx_free (re_match_context_t *mctx)
    4251           0 : {
    4252           0 :   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
    4253           0 :   match_ctx_clean (mctx);
    4254             :   re_free (mctx->sub_tops);
    4255             :   re_free (mctx->bkref_ents);
    4256             : }
    4257             : 
    4258             : /* Add a new backreference entry to MCTX.
    4259             :    Note that we assume that caller never call this function with duplicate
    4260             :    entry, and call with STR_IDX which isn't smaller than any existing entry.
    4261             : */
    4262           0 : 
    4263             : static reg_errcode_t
    4264             : internal_function
    4265           0 : match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
    4266             :                      Idx to)
    4267             : {
    4268           0 :   if (mctx->nbkref_ents >= mctx->abkref_ents)
    4269             :     {
    4270           0 :       struct re_backref_cache_entry* new_entry;
    4271             :       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
    4272           0 :                               mctx->abkref_ents * 2);
    4273           0 :       if (BE (new_entry == NULL, 0))
    4274             :         {
    4275           0 :           re_free (mctx->bkref_ents);
    4276           0 :           return REG_ESPACE;
    4277           0 :         }
    4278           0 :       mctx->bkref_ents = new_entry;
    4279             :       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
    4280           0 :               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
    4281           0 :       mctx->abkref_ents *= 2;
    4282           0 :     }
    4283             :   if (mctx->nbkref_ents > 0
    4284           0 :       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
    4285           0 :     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
    4286           0 : 
    4287           0 :   mctx->bkref_ents[mctx->nbkref_ents].node = node;
    4288             :   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
    4289             :   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
    4290             :   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
    4291             : 
    4292             :   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
    4293             :      If bit N is clear, means that this entry won't epsilon-transition to
    4294             :      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
    4295             :      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
    4296             :      such node.
    4297           0 : 
    4298           0 :      A backreference does not epsilon-transition unless it is empty, so set
    4299             :      to all zeros if FROM != TO.  */
    4300           0 :   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
    4301           0 :     = (from == to ? -1 : 0);
    4302           0 : 
    4303           0 :   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
    4304             :   if (mctx->max_mb_elem_len < to - from)
    4305             :     mctx->max_mb_elem_len = to - from;
    4306             :   return REG_NOERROR;
    4307             : }
    4308             : 
    4309             : /* Return the first entry with the same str_idx, or REG_MISSING if none is
    4310             :    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
    4311           0 : 
    4312             : static Idx
    4313             : internal_function
    4314           0 : search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
    4315           0 : {
    4316             :   Idx left, right, mid, last;
    4317           0 :   last = right = mctx->nbkref_ents;
    4318           0 :   for (left = 0; left < right;)
    4319           0 :     {
    4320             :       mid = (left + right) / 2;
    4321           0 :       if (mctx->bkref_ents[mid].str_idx < str_idx)
    4322             :         left = mid + 1;
    4323           0 :       else
    4324           0 :         right = mid;
    4325             :     }
    4326           0 :   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
    4327             :     return left;
    4328             :   else
    4329             :     return REG_MISSING;
    4330             : }
    4331             : 
    4332             : /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
    4333             :    at STR_IDX.  */
    4334           0 : 
    4335             : static reg_errcode_t
    4336             : internal_function
    4337             : match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
    4338             : {
    4339             : #ifdef DEBUG
    4340           0 :   assert (mctx->sub_tops != NULL);
    4341             :   assert (mctx->asub_tops > 0);
    4342           0 : #endif
    4343           0 :   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
    4344             :     {
    4345             :       Idx new_asub_tops = mctx->asub_tops * 2;
    4346           0 :       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
    4347           0 :                                                    re_sub_match_top_t *,
    4348           0 :                                                    new_asub_tops);
    4349           0 :       if (BE (new_array == NULL, 0))
    4350             :         return REG_ESPACE;
    4351           0 :       mctx->sub_tops = new_array;
    4352           0 :       mctx->asub_tops = new_asub_tops;
    4353           0 :     }
    4354           0 :   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
    4355           0 :   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
    4356           0 :     return REG_ESPACE;
    4357             :   mctx->sub_tops[mctx->nsub_tops]->node = node;
    4358             :   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
    4359             :   return REG_NOERROR;
    4360             : }
    4361             : 
    4362             : /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
    4363             :    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
    4364           0 : 
    4365             : static re_sub_match_last_t *
    4366             : internal_function
    4367           0 : match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
    4368             : {
    4369           0 :   re_sub_match_last_t *new_entry;
    4370           0 :   if (BE (subtop->nlasts == subtop->alasts, 0))
    4371             :     {
    4372             :       Idx new_alasts = 2 * subtop->alasts + 1;
    4373           0 :       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
    4374           0 :                                                     re_sub_match_last_t *,
    4375           0 :                                                     new_alasts);
    4376           0 :       if (BE (new_array == NULL, 0))
    4377             :         return NULL;
    4378           0 :       subtop->lasts = new_array;
    4379           0 :       subtop->alasts = new_alasts;
    4380             :     }
    4381           0 :   new_entry = calloc (1, sizeof (re_sub_match_last_t));
    4382           0 :   if (BE (new_entry != NULL, 1))
    4383           0 :     {
    4384           0 :       subtop->lasts[subtop->nlasts] = new_entry;
    4385             :       new_entry->node = node;
    4386           0 :       new_entry->str_idx = str_idx;
    4387             :       ++subtop->nlasts;
    4388             :     }
    4389             :   return new_entry;
    4390             : }
    4391           6 : 
    4392             : static void
    4393             : internal_function
    4394           6 : sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
    4395           6 :                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
    4396           6 : {
    4397           6 :   sctx->sifted_states = sifted_sts;
    4398           6 :   sctx->limited_states = limited_sts;
    4399           6 :   sctx->last_node = last_node;
    4400             :   sctx->last_str_idx = last_str_idx;
    4401             :   re_node_set_init_empty (&sctx->limits);
    4402             : }

Generated by: LCOV version 1.10