To: vim_dev@googlegroups.com Subject: Patch 7.3.1029 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1029 Problem: New regexp performance: Unused position state being copied. Solution: Keep track of which positions are actually valid. Files: src/regexp_nfa.c *** ../vim-7.3.1028/src/regexp_nfa.c 2013-05-26 21:47:22.000000000 +0200 --- src/regexp_nfa.c 2013-05-26 22:45:26.000000000 +0200 *************** *** 1649,1670 **** return OK; } - typedef union - { - struct multipos - { - lpos_T start; - lpos_T end; - } multilist[NSUBEXP]; - struct linepos - { - char_u *start; - char_u *end; - } linelist[NSUBEXP]; - } regsub_T; - - static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); - #ifdef DEBUG static char_u code[50]; --- 1649,1654 ---- *************** *** 2489,2494 **** --- 2473,2498 ---- * NFA execution code. ****************************************************************/ + typedef struct + { + int in_use; /* number of subexpr with useful info */ + + /* When REG_MULTI is TRUE multilist is used, otherwise linelist. */ + union + { + struct multipos + { + lpos_T start; + lpos_T end; + } multilist[NSUBEXP]; + struct linepos + { + char_u *start; + char_u *end; + } linelist[NSUBEXP]; + }; + } regsub_T; + /* nfa_thread_T contains execution information of a NFA state */ typedef struct { *************** *** 2507,2513 **** static int nfa_match; static void addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int off, int lid)); - static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsub_T *m, int lid, int *ip)); static void --- 2511,2516 ---- *************** *** 2521,2527 **** --- 2524,2532 ---- int subidx; nfa_thread_T *lastthread; lpos_T save_lpos; + int save_in_use; char_u *save_ptr; + int i; if (l == NULL || state == NULL) return; *************** *** 2557,2572 **** state->lastlist = lid; lastthread = &l->t[l->n++]; lastthread->state = state; ! ! /* Copy the match start and end positions. */ ! if (REG_MULTI) ! mch_memmove(&lastthread->sub.multilist[0], ! &m->multilist[0], ! sizeof(struct multipos) * nfa_nsubexpr); ! else ! mch_memmove(&lastthread->sub.linelist[0], ! &m->linelist[0], ! sizeof(struct linepos) * nfa_nsubexpr); } } --- 2562,2580 ---- state->lastlist = lid; lastthread = &l->t[l->n++]; lastthread->state = state; ! lastthread->sub.in_use = m->in_use; ! if (m->in_use > 0) ! { ! /* Copy the match start and end positions. */ ! if (REG_MULTI) ! mch_memmove(&lastthread->sub.multilist[0], ! &m->multilist[0], ! sizeof(struct multipos) * m->in_use); ! else ! mch_memmove(&lastthread->sub.linelist[0], ! &m->linelist[0], ! sizeof(struct linepos) * m->in_use); ! } } } *************** *** 2636,2644 **** else subidx = state->c - NFA_MOPEN; if (REG_MULTI) { ! save_lpos = m->multilist[subidx].start; if (off == -1) { m->multilist[subidx].start.lnum = reglnum + 1; --- 2644,2668 ---- else subidx = state->c - NFA_MOPEN; + /* Set the position (with "off") in the subexpression. Save and + * restore it when it was in use. Otherwise fill any gap. */ if (REG_MULTI) { ! if (subidx < m->in_use) ! { ! save_lpos = m->multilist[subidx].start; ! save_in_use = -1; ! } ! else ! { ! save_in_use = m->in_use; ! for (i = m->in_use; i < subidx; ++i) ! { ! m->multilist[i].start.lnum = -1; ! m->multilist[i].end.lnum = -1; ! } ! m->in_use = subidx + 1; ! } if (off == -1) { m->multilist[subidx].start.lnum = reglnum + 1; *************** *** 2653,2668 **** } else { ! save_ptr = m->linelist[subidx].start; m->linelist[subidx].start = reginput + off; } addstate(l, state->out, m, off, lid); ! if (REG_MULTI) ! m->multilist[subidx].start = save_lpos; else ! m->linelist[subidx].start = save_ptr; break; case NFA_MCLOSE + 0: --- 2677,2711 ---- } else { ! if (subidx < m->in_use) ! { ! save_ptr = m->linelist[subidx].start; ! save_in_use = -1; ! } ! else ! { ! save_in_use = m->in_use; ! for (i = m->in_use; i < subidx; ++i) ! { ! m->linelist[i].start = NULL; ! m->linelist[i].end = NULL; ! } ! m->in_use = subidx + 1; ! } m->linelist[subidx].start = reginput + off; } addstate(l, state->out, m, off, lid); ! if (save_in_use == -1) ! { ! if (REG_MULTI) ! m->multilist[subidx].start = save_lpos; ! else ! m->linelist[subidx].start = save_ptr; ! } else ! m->in_use = save_in_use; break; case NFA_MCLOSE + 0: *************** *** 2686,2691 **** --- 2729,2739 ---- else subidx = state->c - NFA_MCLOSE; + /* We don't fill in gaps here, there must have been an MOPEN that + * has done that. */ + save_in_use = m->in_use; + if (m->in_use <= subidx) + m->in_use = subidx + 1; if (REG_MULTI) { save_lpos = m->multilist[subidx].end; *************** *** 2713,2718 **** --- 2761,2767 ---- m->multilist[subidx].end = save_lpos; else m->linelist[subidx].end = save_ptr; + m->in_use = save_in_use; break; } } *************** *** 2917,2922 **** --- 2966,2973 ---- } } + static int nfa_regmatch __ARGS((nfa_state_T *start, regsub_T *submatch, regsub_T *m)); + /* * Main matching routine. * *************** *** 2960,2966 **** #endif nfa_match = FALSE; ! /* Allocate memory for the lists of nodes */ size = (nstate + 1) * sizeof(nfa_thread_T); list[0].t = (nfa_thread_T *)lalloc(size, TRUE); list[1].t = (nfa_thread_T *)lalloc(size, TRUE); --- 3011,3017 ---- #endif nfa_match = FALSE; ! /* Allocate memory for the lists of nodes. */ size = (nstate + 1) * sizeof(nfa_thread_T); list[0].t = (nfa_thread_T *)lalloc(size, TRUE); list[1].t = (nfa_thread_T *)lalloc(size, TRUE); *************** *** 3099,3105 **** { case NFA_MATCH: nfa_match = TRUE; ! *submatch = t->sub; #ifdef ENABLE_LOG for (j = 0; j < 4; j++) if (REG_MULTI) --- 3150,3168 ---- { case NFA_MATCH: nfa_match = TRUE; ! submatch->in_use = t->sub.in_use; ! if (REG_MULTI) ! for (j = 0; j < submatch->in_use; j++) ! { ! submatch->multilist[j].start = t->sub.multilist[j].start; ! submatch->multilist[j].end = t->sub.multilist[j].end; ! } ! else ! for (j = 0; j < submatch->in_use; j++) ! { ! submatch->linelist[j].start = t->sub.linelist[j].start; ! submatch->linelist[j].end = t->sub.linelist[j].end; ! } #ifdef ENABLE_LOG for (j = 0; j < 4; j++) if (REG_MULTI) *************** *** 3194,3210 **** reglnum = old_reglnum; /* Copy submatch info from the recursive call */ if (REG_MULTI) ! for (j = 1; j < nfa_nsubexpr; j++) { t->sub.multilist[j].start = m->multilist[j].start; t->sub.multilist[j].end = m->multilist[j].end; } else ! for (j = 1; j < nfa_nsubexpr; j++) { t->sub.linelist[j].start = m->linelist[j].start; t->sub.linelist[j].end = m->linelist[j].end; } /* t->state->out1 is the corresponding END_INVISIBLE node */ addstate_here(thislist, t->state->out1->out, &t->sub, listid, &listidx); --- 3257,3275 ---- reglnum = old_reglnum; /* Copy submatch info from the recursive call */ if (REG_MULTI) ! for (j = 1; j < m->in_use; j++) { t->sub.multilist[j].start = m->multilist[j].start; t->sub.multilist[j].end = m->multilist[j].end; } else ! for (j = 1; j < m->in_use; j++) { t->sub.linelist[j].start = m->linelist[j].start; t->sub.linelist[j].end = m->linelist[j].end; } + t->sub.in_use = m->in_use; + /* t->state->out1 is the corresponding END_INVISIBLE node */ addstate_here(thislist, t->state->out1->out, &t->sub, listid, &listidx); *************** *** 3703,3708 **** --- 3768,3775 ---- vim_memset(sub.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr); vim_memset(m.linelist, 0, sizeof(struct linepos) * nfa_nsubexpr); } + sub.in_use = 0; + m.in_use = 0; if (nfa_regmatch(start, &sub, &m) == FALSE) return 0; *************** *** 3710,3716 **** cleanup_subexpr(); if (REG_MULTI) { ! for (i = 0; i < nfa_nsubexpr; i++) { reg_startpos[i] = sub.multilist[i].start; reg_endpos[i] = sub.multilist[i].end; --- 3777,3783 ---- cleanup_subexpr(); if (REG_MULTI) { ! for (i = 0; i < sub.in_use; i++) { reg_startpos[i] = sub.multilist[i].start; reg_endpos[i] = sub.multilist[i].end; *************** *** 3732,3738 **** } else { ! for (i = 0; i < nfa_nsubexpr; i++) { reg_startp[i] = sub.linelist[i].start; reg_endp[i] = sub.linelist[i].end; --- 3799,3805 ---- } else { ! for (i = 0; i < sub.in_use; i++) { reg_startp[i] = sub.linelist[i].start; reg_endp[i] = sub.linelist[i].end; *** ../vim-7.3.1028/src/version.c 2013-05-26 21:47:22.000000000 +0200 --- src/version.c 2013-05-26 22:53:55.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1029, /**/ -- I used to be indecisive, now I'm not sure. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///