To: vim_dev@googlegroups.com Subject: Patch 7.3.1017 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1017 Problem: Zero width match changes length of match. Solution: For a zero width match put new states in the current position in the state list. Files: src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok, src/regexp.h *** ../vim-7.3.1016/src/regexp_nfa.c 2013-05-25 15:31:02.000000000 +0200 --- src/regexp_nfa.c 2013-05-25 20:18:25.000000000 +0200 *************** *** 2471,2494 **** * NFA execution code. ****************************************************************/ ! /* thread_T contains runtime information of a NFA state */ ! struct thread { nfa_state_T *state; ! regsub_T sub; /* submatch info */ ! }; typedef struct { ! thread_T *t; ! int n; ! } List; ! static void addstate __ARGS((List *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); static void addstate(l, state, m, off, lid, match) ! List *l; /* runtime state list */ nfa_state_T *state; /* state to update */ regsub_T *m; /* pointers to subexpressions */ int off; /* byte offset, when -1 go to next line */ --- 2471,2497 ---- * NFA execution code. ****************************************************************/ ! /* nfa_thread_T contains runtime information of a NFA state */ ! typedef struct { nfa_state_T *state; ! regsub_T sub; /* Submatch info. TODO: expensive! */ ! } nfa_thread_T; ! typedef struct { ! nfa_thread_T *t; ! int n; ! } nfa_list_T; ! static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid, int *match)); ! ! static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *match, int *ip)); static void addstate(l, state, m, off, lid, match) ! nfa_list_T *l; /* runtime state list */ nfa_state_T *state; /* state to update */ regsub_T *m; /* pointers to subexpressions */ int off; /* byte offset, when -1 go to next line */ *************** *** 2497,2503 **** { regsub_T save; int subidx = 0; ! thread_T *lastthread; if (l == NULL || state == NULL) return; --- 2500,2506 ---- { regsub_T save; int subidx = 0; ! nfa_thread_T *lastthread; if (l == NULL || state == NULL) return; *************** *** 2533,2539 **** state->lastlist = lid; lastthread = &l->t[l->n++]; lastthread->state = state; ! lastthread->sub = *m; } } --- 2536,2542 ---- state->lastlist = lid; lastthread = &l->t[l->n++]; lastthread->state = state; ! lastthread->sub = *m; /* TODO: expensive! */ } } *************** *** 2698,2703 **** --- 2701,2754 ---- } /* + * Like addstate(), but the new state(s) are put at position "*ip". + * Used for zero-width matches, next state to use is the added one. + * This makes sure the order of states to be tried does not change, which + * matters for alternatives. + */ + static void + addstate_here(l, state, m, lid, matchp, ip) + nfa_list_T *l; /* runtime state list */ + nfa_state_T *state; /* state to update */ + regsub_T *m; /* pointers to subexpressions */ + int lid; + int *matchp; /* found match? */ + int *ip; + { + int tlen = l->n; + int count; + int i = *ip; + + /* first add the state(s) at the end, so that we know how many there are */ + addstate(l, state, m, 0, lid, matchp); + + /* when "*ip" was at the end of the list, nothing to do */ + if (i + 1 == tlen) + return; + + /* re-order to put the new state at the current position */ + count = l->n - tlen; + if (count > 1) + { + /* make space for new states, then move them from the + * end to the current position */ + mch_memmove(&(l->t[i + count]), + &(l->t[i + 1]), + sizeof(nfa_thread_T) * (l->n - i - 1)); + mch_memmove(&(l->t[i]), + &(l->t[l->n - 1]), + sizeof(nfa_thread_T) * count); + } + else + { + /* overwrite the current state */ + l->t[i] = l->t[l->n - 1]; + } + --l->n; + *ip = i - 1; + } + + /* * Check character class "class" against current character c. */ static int *************** *** 2872,2888 **** int match = FALSE; int flag = 0; int old_reglnum = -1; ! int go_to_nextline; ! thread_T *t; char_u *old_reginput = NULL; char_u *old_regline = NULL; ! List list[3]; ! List *listtbl[2][2]; ! List *ll; int listid = 1; ! List *thislist; ! List *nextlist; ! List *neglist; int *listids = NULL; int j = 0; #ifdef NFA_REGEXP_DEBUG_LOG --- 2923,2939 ---- int match = FALSE; int flag = 0; int old_reglnum = -1; ! int go_to_nextline = FALSE; ! nfa_thread_T *t; char_u *old_reginput = NULL; char_u *old_regline = NULL; ! nfa_list_T list[3]; ! nfa_list_T *listtbl[2][2]; ! nfa_list_T *ll; int listid = 1; ! nfa_list_T *thislist; ! nfa_list_T *nextlist; ! nfa_list_T *neglist; int *listids = NULL; int j = 0; #ifdef NFA_REGEXP_DEBUG_LOG *************** *** 2896,2905 **** #endif /* Allocate memory for the lists of nodes */ ! size = (nstate + 1) * sizeof(thread_T); ! list[0].t = (thread_T *)lalloc(size, TRUE); ! list[1].t = (thread_T *)lalloc(size, TRUE); ! list[2].t = (thread_T *)lalloc(size, TRUE); if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) goto theend; vim_memset(list[0].t, 0, size); --- 2947,2956 ---- #endif /* Allocate memory for the lists of nodes */ ! size = (nstate + 1) * sizeof(nfa_thread_T); ! list[0].t = (nfa_thread_T *)lalloc(size, TRUE); ! list[1].t = (nfa_thread_T *)lalloc(size, TRUE); ! list[2].t = (nfa_thread_T *)lalloc(size, TRUE); if (list[0].t == NULL || list[1].t == NULL || list[2].t == NULL) goto theend; vim_memset(list[0].t, 0, size); *************** *** 3056,3063 **** * nfa_regmatch(). Submatches are stored in *m, and used in * the parent call. */ if (start->c == NFA_MOPEN + 0) ! addstate(thislist, t->state->out, &t->sub, 0, listid, ! &match); else { *m = t->sub; --- 3107,3114 ---- * nfa_regmatch(). Submatches are stored in *m, and used in * the parent call. */ if (start->c == NFA_MOPEN + 0) ! addstate_here(thislist, t->state->out, &t->sub, listid, ! &match, &i); else { *m = t->sub; *************** *** 3130,3137 **** t->sub.end[j] = m->end[j]; } /* t->state->out1 is the corresponding END_INVISIBLE node */ ! addstate(thislist, t->state->out1->out, &t->sub, 0, listid, ! &match); } else { --- 3181,3188 ---- t->sub.end[j] = m->end[j]; } /* t->state->out1 is the corresponding END_INVISIBLE node */ ! addstate_here(thislist, t->state->out1->out, &t->sub, ! listid, &match, &i); } else { *************** *** 3142,3155 **** case NFA_BOL: if (reginput == regline) ! addstate(thislist, t->state->out, &t->sub, 0, listid, ! &match); break; case NFA_EOL: if (c == NUL) ! addstate(thislist, t->state->out, &t->sub, 0, listid, ! &match); break; case NFA_BOW: --- 3193,3206 ---- case NFA_BOL: if (reginput == regline) ! addstate_here(thislist, t->state->out, &t->sub, listid, ! &match, &i); break; case NFA_EOL: if (c == NUL) ! addstate_here(thislist, t->state->out, &t->sub, listid, ! &match, &i); break; case NFA_BOW: *************** *** 3176,3183 **** && vim_iswordc_buf(reginput[-1], reg_buf))) bow = FALSE; if (bow) ! addstate(thislist, t->state->out, &t->sub, 0, listid, ! &match); break; } --- 3227,3234 ---- && vim_iswordc_buf(reginput[-1], reg_buf))) bow = FALSE; if (bow) ! addstate_here(thislist, t->state->out, &t->sub, listid, ! &match, &i); break; } *************** *** 3204,3211 **** || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) eow = FALSE; if (eow) ! addstate(thislist, t->state->out, &t->sub, 0, listid, ! &match); break; } --- 3255,3262 ---- || (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf))) eow = FALSE; if (eow) ! addstate_here(thislist, t->state->out, &t->sub, listid, ! &match, &i); break; } *** ../vim-7.3.1016/src/testdir/test64.in 2013-05-21 13:30:17.000000000 +0200 --- src/testdir/test64.in 2013-05-25 19:54:40.000000000 +0200 *************** *** 270,275 **** --- 270,276 ---- :call add(tl, ['aa \zsax', ' ax']) " must match before \zs :call add(tl, ['abc \zsmatch\ze abc', 'abc abc abc match abc abc', 'match']) :call add(tl, ['\v(a \zsif .*){2}', 'a if then a if last', 'if last', 'a if last']) + :call add(tl, ['\>\zs.', 'aword. ', '.']) :"""" Tests for \@ features :call add(tl, ['abc\@=', 'abc', 'ab']) *************** *** 299,304 **** --- 300,311 ---- :call add(tl, ['\%u0020', 'yes no', ' ']) :call add(tl, ['\%U00000020', 'yes no', ' ']) + :"""" Alternatives, must use first longest match + :call add(tl, ['goo\|go', 'google', 'goo']) + :call add(tl, ['\\zs. OK - abc\@= OK - abc\@=cd OK - abc\@= *************** *** 231,234 **** --- 232,238 ---- OK - \%x20 OK - \%u0020 OK - \%U00000020 + OK - goo\|go + OK - \