To: vim_dev@googlegroups.com Subject: Patch 7.3.1103 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 7.3.1103 Problem: New regexp engine: overhead in saving and restoring. Solution: Make saving and restoring list IDs faster. Don't copy or check \z subexpressions when they are not used. Files: src/regexp_nfa.c *** ../vim-7.3.1102/src/regexp_nfa.c 2013-06-02 16:40:44.000000000 +0200 --- src/regexp_nfa.c 2013-06-02 21:00:41.000000000 +0200 *************** *** 237,242 **** --- 237,245 ---- /* NFA regexp \1 .. \9 encountered. */ static int nfa_has_backref; + /* NFA regexp has \z( ), set zsubexpr. */ + static int nfa_has_zsubexpr; + /* Number of sub expressions actually being used during execution. 1 if only * the whole match (subexpr 0) is used. */ static int nfa_nsubexpr; *************** *** 272,281 **** static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); static int check_char_class __ARGS((int class, int c)); static void st_error __ARGS((int *postfix, int *end, int *p)); ! static void nfa_set_neg_listids __ARGS((nfa_state_T *start)); ! static void nfa_set_null_listids __ARGS((nfa_state_T *start)); ! static void nfa_save_listids __ARGS((nfa_state_T *start, int *list)); ! static void nfa_restore_listids __ARGS((nfa_state_T *start, int *list)); static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos)); static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); --- 275,282 ---- static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size)); static int check_char_class __ARGS((int class, int c)); static void st_error __ARGS((int *postfix, int *end, int *p)); ! static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list)); ! static void nfa_restore_listids __ARGS((nfa_regprog_T *prog, int *list)); static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos)); static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col)); static long nfa_regexec_both __ARGS((char_u *line, colnr_T col)); *************** *** 3000,3005 **** --- 3001,3024 ---- return TRUE; } + #ifdef ENABLE_LOG + static void + report_state(char *action, regsub_T *sub, nfa_state_T *state, int lid); + { + int col; + + if (sub->in_use <= 0) + col = -1; + else if (REG_MULTI) + col = sub->list.multi[0].start.col; + else + col = (int)(sub->list.line[0].start - regline); + nfa_set_code(state->c); + fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)\n", + action, abs(state->id), lid, state->c, code, col); + } + #endif + static void addstate(l, state, subs, off) nfa_list_T *l; /* runtime state list */ *************** *** 3118,3124 **** if (thread->state->id == state->id && sub_equal(&thread->subs.norm, &subs->norm) #ifdef FEAT_SYN_HL ! && sub_equal(&thread->subs.synt, &subs->synt) #endif ) goto skip_add; --- 3137,3144 ---- if (thread->state->id == state->id && sub_equal(&thread->subs.norm, &subs->norm) #ifdef FEAT_SYN_HL ! && (!nfa_has_zsubexpr || ! sub_equal(&thread->subs.synt, &subs->synt)) #endif ) goto skip_add; *************** *** 3141,3181 **** thread->state = state; copy_sub(&thread->subs.norm, &subs->norm); #ifdef FEAT_SYN_HL ! copy_sub(&thread->subs.synt, &subs->synt); #endif #ifdef ENABLE_LOG ! { ! int col; ! ! if (thread->subs.norm.in_use <= 0) ! col = -1; ! else if (REG_MULTI) ! col = thread->subs.norm.list.multi[0].start.col; ! else ! col = (int)(thread->subs.norm.list.line[0].start - regline); ! nfa_set_code(state->c); ! fprintf(log_fd, "> Adding state %d to list %d. char %d: %s (start col %d)\n", ! abs(state->id), l->id, state->c, code, col); ! did_print = TRUE; ! } #endif } #ifdef ENABLE_LOG if (!did_print) ! { ! int col; ! ! if (subs->norm.in_use <= 0) ! col = -1; ! else if (REG_MULTI) ! col = subs->norm.list.multi[0].start.col; ! else ! col = (int)(subs->norm.list.line[0].start - regline); ! nfa_set_code(state->c); ! fprintf(log_fd, "> Processing state %d for list %d. char %d: %s (start col %d)\n", ! abs(state->id), l->id, state->c, code, col); ! } #endif switch (state->c) { --- 3161,3178 ---- thread->state = state; copy_sub(&thread->subs.norm, &subs->norm); #ifdef FEAT_SYN_HL ! if (nfa_has_zsubexpr) ! copy_sub(&thread->subs.synt, &subs->synt); #endif #ifdef ENABLE_LOG ! report_state("Adding", &thread->subs.norm, state, l->id); ! did_print = TRUE; #endif } #ifdef ENABLE_LOG if (!did_print) ! report_state("Processing", &subs->norm, state, l->id); #endif switch (state->c) { *************** *** 3600,3648 **** #endif /* ! * Set all NFA nodes' list ID equal to -1. */ static void ! nfa_set_neg_listids(start) ! nfa_state_T *start; ! { ! if (start != NULL && start->lastlist >= 0) ! { ! start->lastlist = -1; ! nfa_set_neg_listids(start->out); ! nfa_set_neg_listids(start->out1); ! } ! } ! ! /* ! * Set all NFA nodes' list ID equal to 0. ! */ ! static void ! nfa_set_null_listids(start) ! nfa_state_T *start; ! { ! if (start != NULL && start->lastlist == -1) ! { ! start->lastlist = 0; ! nfa_set_null_listids(start->out); ! nfa_set_null_listids(start->out1); ! } ! } ! ! /* ! * Save list IDs for all NFA states in "list". ! */ ! static void ! nfa_save_listids(start, list) ! nfa_state_T *start; int *list; { ! if (start != NULL && start->lastlist != -1) ! { ! list[abs(start->id)] = start->lastlist; ! start->lastlist = -1; ! nfa_save_listids(start->out, list); ! nfa_save_listids(start->out1, list); } } --- 3597,3620 ---- #endif /* ! * Save list IDs for all NFA states of "prog" into "list". ! * Also reset the IDs to zero. */ static void ! nfa_save_listids(prog, list) ! nfa_regprog_T *prog; int *list; { ! int i; ! nfa_state_T *p; ! ! /* Order in the list is reverse, it's a bit faster that way. */ ! p = &prog->state[0]; ! for (i = prog->nstate; --i >= 0; ) ! { ! list[i] = p->lastlist; ! p->lastlist = 0; ! ++p; } } *************** *** 3650,3664 **** * Restore list IDs from "list" to all NFA states. */ static void ! nfa_restore_listids(start, list) ! nfa_state_T *start; int *list; { ! if (start != NULL && start->lastlist == -1) { ! start->lastlist = list[abs(start->id)]; ! nfa_restore_listids(start->out, list); ! nfa_restore_listids(start->out1, list); } } --- 3622,3639 ---- * Restore list IDs from "list" to all NFA states. */ static void ! nfa_restore_listids(prog, list) ! nfa_regprog_T *prog; int *list; { ! int i; ! nfa_state_T *p; ! ! p = &prog->state[0]; ! for (i = prog->nstate; --i >= 0; ) { ! p->lastlist = list[i]; ! ++p; } } *************** *** 3673,3679 **** return val == pos; } ! static int nfa_regmatch __ARGS((nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); /* * Main matching routine. --- 3648,3654 ---- return val == pos; } ! static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m)); /* * Main matching routine. *************** *** 3686,3692 **** * Note: Caller must ensure that: start != NULL. */ static int ! nfa_regmatch(start, submatch, m) nfa_state_T *start; regsubs_T *submatch; regsubs_T *m; --- 3661,3668 ---- * Note: Caller must ensure that: start != NULL. */ static int ! nfa_regmatch(prog, start, submatch, m) ! nfa_regprog_T *prog; nfa_state_T *start; regsubs_T *submatch; regsubs_T *m; *************** *** 3872,3878 **** nfa_match = TRUE; copy_sub(&submatch->norm, &t->subs.norm); #ifdef FEAT_SYN_HL ! copy_sub(&submatch->synt, &t->subs.synt); #endif #ifdef ENABLE_LOG log_subsexpr(&t->subs); --- 3848,3855 ---- nfa_match = TRUE; copy_sub(&submatch->norm, &t->subs.norm); #ifdef FEAT_SYN_HL ! if (nfa_has_zsubexpr) ! copy_sub(&submatch->synt, &t->subs.synt); #endif #ifdef ENABLE_LOG log_subsexpr(&t->subs); *************** *** 3928,3934 **** { copy_sub(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL ! copy_sub(&m->synt, &t->subs.synt); #endif } nfa_match = TRUE; --- 3905,3912 ---- { copy_sub(&m->norm, &t->subs.norm); #ifdef FEAT_SYN_HL ! if (nfa_has_zsubexpr) ! copy_sub(&m->synt, &t->subs.synt); #endif } nfa_match = TRUE; *************** *** 4024,4035 **** /* Have to clear the listid field of the NFA nodes, so that * nfa_regmatch() and addstate() can run properly after * recursion. */ ! nfa_save_listids(start, listids); ! nfa_set_null_listids(start); nfa_endp = endposp; ! result = nfa_regmatch(t->state->out, submatch, m); ! nfa_set_neg_listids(start); ! nfa_restore_listids(start, listids); /* restore position in input text */ reginput = save_reginput; --- 4002,4011 ---- /* Have to clear the listid field of the NFA nodes, so that * nfa_regmatch() and addstate() can run properly after * recursion. */ ! nfa_save_listids(prog, listids); nfa_endp = endposp; ! result = nfa_regmatch(prog, t->state->out, submatch, m); ! nfa_restore_listids(prog, listids); /* restore position in input text */ reginput = save_reginput; *************** *** 4665,4671 **** --- 4641,4652 ---- #ifdef FEAT_SYN_HL /* Clear the external match subpointers if necessary. */ if (prog->reghasz == REX_SET) + { + nfa_has_zsubexpr = TRUE; need_clear_zsubexpr = TRUE; + } + else + nfa_has_zsubexpr = FALSE; #endif #ifdef ENABLE_LOG *************** *** 4694,4700 **** clear_sub(&m.synt); #endif ! if (nfa_regmatch(start, &subs, &m) == FALSE) return 0; cleanup_subexpr(); --- 4675,4681 ---- clear_sub(&m.synt); #endif ! if (nfa_regmatch(prog, start, &subs, &m) == FALSE) return 0; cleanup_subexpr(); *** ../vim-7.3.1102/src/version.c 2013-06-02 19:22:05.000000000 +0200 --- src/version.c 2013-06-02 21:24:50.000000000 +0200 *************** *** 730,731 **** --- 730,733 ---- { /* Add new patch number below this line */ + /**/ + 1103, /**/ -- hundred-and-one symptoms of being an internet addict: 53. To find out what time it is, you send yourself an e-mail and check the "Date:" field. /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\ \\\ an exciting new programming language -- http://www.Zimbu.org /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///