Tcl Source Code: Artifact [dc77628c45]

Artifact dc77628c45d45e5145c985efc5af6d41a11edb8c:

Attachment "quantified-backrefs.patch" to ticket [0e0e150e49] added by tgl 2015-09-19 15:55:52. (unpublished)
diff -pcdr tcl8.6.4.orig/generic/regcomp.c tcl8.6.4/generic/regcomp.c
*** tcl8.6.4.orig/generic/regcomp.c	Thu Feb 26 11:57:28 2015
--- tcl8.6.4/generic/regcomp.c	Fri Sep 18 12:16:30 2015
*************** parseqatom(
*** 1102,1112 ****
      /*
       * Prepare a general-purpose state skeleton.
       *
!      *    ---> [s] ---prefix---> [begin] ---atom---> [end] ----rest---> [rp]
!      *   /                                            /
!      * [lp] ----> [s2] ----bypass---------------------
       *
!      * where bypass is an empty, and prefix is some repetitions of atom
       */
  
      s = newstate(v->nfa);	/* first, new endpoints for the atom */
--- 1102,1118 ----
      /*
       * Prepare a general-purpose state skeleton.
       *
!      * In the no-backrefs case, we want this:
       *
!      * [lp] ---> [s] ---prefix---> [begin] ---atom---> [end] ---rest---> [rp]
!      *
!      * where prefix is some repetitions of atom.  In the general case we need
!      *
!      * [lp] ---> [s] ---iterator---> [s2] ---rest---> [rp]
!      *
!      * where the iterator wraps around [begin] ---atom---> [end]
!      *
!      * We make the s state here for both cases; s2 is made below if needed
       */
  
      s = newstate(v->nfa);	/* first, new endpoints for the atom */
*************** parseqatom(
*** 1117,1127 ****
      NOERR();
      atom->begin = s;
      atom->end = s2;
!     s = newstate(v->nfa);	/* and spots for prefix and bypass */
!     s2 = newstate(v->nfa);
      NOERR();
      EMPTYARC(lp, s);
-     EMPTYARC(lp, s2);
      NOERR();
  
      /*
--- 1123,1131 ----
      NOERR();
      atom->begin = s;
      atom->end = s2;
!     s = newstate(v->nfa);	/* set up starting state */
      NOERR();
      EMPTYARC(lp, s);
      NOERR();
  
      /*
*************** parseqatom(
*** 1166,1192 ****
      }
  
      /*
!      * It's quantifier time; first, turn x{0,...} into x{1,...}|empty
!      */
! 
!     if (m == 0) {
! 	EMPTYARC(s2, atom->end);/* the bypass */
! 	assert(PREF(qprefer) != 0);
! 	f = COMBINE(qprefer, atom->flags);
! 	t = subre(v, '|', f, lp, atom->end);
! 	NOERR();
! 	t->left = atom;
! 	t->right = subre(v, '|', PREF(f), s2, atom->end);
! 	NOERR();
! 	t->right->left = subre(v, '=', 0, s2, atom->end);
! 	NOERR();
! 	*atomp = t;
! 	atomp = &t->left;
! 	m = 1;
!     }
! 
!     /*
!      * Deal with the rest of the quantifier.
       */
  
      if (atomtype == BACKREF) {
--- 1170,1177 ----
      }
  
      /*
!      * It's quantifier time.  If the atom is just a backref, we'll let it deal
!      * with quantifiers internally.
       */
  
      if (atomtype == BACKREF) {
*************** parseqatom(
*** 1204,1219 ****
  	atom->min = (short) m;
  	atom->max = (short) n;
  	atom->flags |= COMBINE(qprefer, atom->flags);
      } else if (m == 1 && n == 1) {
  	/*
  	 * No/vacuous quantifier: done.
  	 */
  
  	EMPTYARC(s, atom->begin);	/* empty prefix */
!     } else {
  	/*
! 	 * Turn x{m,n} into x{m-1,n-1}x, with capturing parens in only second
! 	 * x
  	 */
  
  	dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
--- 1189,1212 ----
  	atom->min = (short) m;
  	atom->max = (short) n;
  	atom->flags |= COMBINE(qprefer, atom->flags);
+ 	/* rest of branch can be strung starting from atom->end */
+ 	s2 = atom->end;
      } else if (m == 1 && n == 1) {
  	/*
  	 * No/vacuous quantifier: done.
  	 */
  
  	EMPTYARC(s, atom->begin);	/* empty prefix */
! 	/* rest of branch can be strung starting from atom->end */
! 	s2 = atom->end;
!     } else if (m > 0 && !(atom->flags & BACKR)) {
  	/*
! 	 * If there's no backrefs involved, we can turn x{m,n} into
! 	 * x{m-1,n-1}x, with capturing parens in only the second x.  This
! 	 * is valid because we only care about capturing matches from the
! 	 * final iteration of the quantifier.  It's a win because we can
! 	 * implement the backref-free left side as a plain DFA node, since
! 	 * we don't really care where its submatches are.
  	 */
  
  	dupnfa(v->nfa, atom->begin, atom->end, s, atom->begin);
*************** parseqatom(
*** 1226,1231 ****
--- 1219,1242 ----
  	NOERR();
  	t->right = atom;
  	*atomp = t;
+ 	/* rest of branch can be strung starting from atom->end */
+ 	s2 = atom->end;
+     } else {
+ 	/* general case: need an iteration node */
+ 	s2 = newstate(v->nfa);
+ 	NOERR();
+ 	moveouts(v->nfa, atom->end, s2);
+ 	NOERR();
+ 	dupnfa(v->nfa, atom->begin, atom->end, s, s2);
+ 	repeat(v, s, s2, m, n);
+ 	f = COMBINE(qprefer, atom->flags);
+ 	t = subre(v, '*', f, s, s2);
+ 	NOERR();
+ 	t->min = (short) m;
+ 	t->max = (short) n;
+ 	t->left = atom;
+ 	*atomp = t;
+ 	/* rest of branch is to be strung from iteration's end state */
      }
  
      /*
*************** parseqatom(
*** 1234,1243 ****
  
      t = top->right;
      if (!(SEE('|') || SEE(stopper) || SEE(EOS))) {
! 	t->right = parsebranch(v, stopper, type, atom->end, rp, 1);
      } else {
! 	EMPTYARC(atom->end, rp);
! 	t->right = subre(v, '=', 0, atom->end, rp);
      }
      NOERR();
      assert(SEE('|') || SEE(stopper) || SEE(EOS));
--- 1245,1254 ----
  
      t = top->right;
      if (!(SEE('|') || SEE(stopper) || SEE(EOS))) {
! 	t->right = parsebranch(v, stopper, type, s2, rp, 1);
      } else {
! 	EMPTYARC(s2, rp);
! 	t->right = subre(v, '=', 0, s2, rp);
      }
      NOERR();
      assert(SEE('|') || SEE(stopper) || SEE(EOS));
*************** scannum(
*** 1304,1309 ****
--- 1315,1322 ----
  
  /*
   - repeat - replicate subNFA for quantifiers
+  * The sub-NFA strung from lp to rp is modified to represent m to n
+  * repetitions of its initial contents.
   * The duplication sequences used here are chosen carefully so that any
   * pointers starting out pointing into the subexpression end up pointing into
   * the last occurrence. (Note that it may not be strung between the same left
*************** subre(
*** 1713,1723 ****
  	v->treechain = ret;
      }
  
!     assert(strchr("|.b(=", op) != NULL);
  
      ret->op = op;
      ret->flags = flags;
!     ret->retry = 0;
      ret->subno = 0;
      ret->min = ret->max = 1;
      ret->left = NULL;
--- 1726,1736 ----
  	v->treechain = ret;
      }
  
!     assert(strchr("=b|.*(", op) != NULL);
  
      ret->op = op;
      ret->flags = flags;
!     ret->id = 0;		/* will be assigned later */
      ret->subno = 0;
      ret->min = ret->max = 1;
      ret->left = NULL;
*************** optst(
*** 1798,1804 ****
  }
  
  /*
!  - numst - number tree nodes (assigning retry indexes)
   ^ static int numst(struct subre *, int);
   */
  static int			/* next number */
--- 1811,1817 ----
  }
  
  /*
!  - numst - number tree nodes (assigning "id" indexes)
   ^ static int numst(struct subre *, int);
   */
  static int			/* next number */
*************** numst(
*** 1811,1817 ****
      assert(t != NULL);
  
      i = start;
!     t->retry = (short) i++;
      if (t->left != NULL) {
  	i = numst(t->left, i);
      }
--- 1824,1830 ----
      assert(t != NULL);
  
      i = start;
!     t->id = (short) i++;
      if (t->left != NULL) {
  	i = numst(t->left, i);
      }
*************** stid(
*** 2146,2159 ****
      size_t bufsize)
  {
      /*
!      * Big enough for hex int or decimal t->retry?
       */
  
!     if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->retry)*3 + 1) {
  	return "unable";
      }
!     if (t->retry != 0) {
! 	sprintf(buf, "%d", t->retry);
      } else {
  	sprintf(buf, "%p", t);
      }
--- 2159,2172 ----
      size_t bufsize)
  {
      /*
!      * Big enough for hex int or decimal t->id?
       */
  
!     if (bufsize < sizeof(void*)*2 + 3 || bufsize < sizeof(t->id)*3 + 1) {
  	return "unable";
      }
!     if (t->id != 0) {
! 	sprintf(buf, "%d", t->id);
      } else {
  	sprintf(buf, "%p", t);
      }
diff -pcdr tcl8.6.4.orig/generic/regexec.c tcl8.6.4/generic/regexec.c
*** tcl8.6.4.orig/generic/regexec.c	Thu Feb 26 11:57:28 2015
--- tcl8.6.4/generic/regexec.c	Fri Sep 18 12:24:14 2015
*************** struct vars {
*** 107,113 ****
      chr *start;			/* start of string */
      chr *stop;			/* just past end of string */
      int err;			/* error code if any (0 none) */
-     regoff_t *mem;		/* memory vector for backtracking */
      struct smalldfa dfa1;
      struct smalldfa dfa2;
  };
--- 107,112 ----
*************** int exec(regex_t *, const chr *, size_t,
*** 129,146 ****
  static int simpleFind(struct vars *const, struct cnfa *const, struct colormap *const);
  static int complicatedFind(struct vars *const, struct cnfa *const, struct colormap *const);
  static int complicatedFindLoop(struct vars *const, struct cnfa *const, struct colormap *const, struct dfa *const, struct dfa *const, chr **const);
! static void zapSubexpressions(regmatch_t *const, const size_t);
! static void zapSubtree(struct vars *const, struct subre *const);
  static void subset(struct vars *const, struct subre *const, chr *const, chr *const);
! static int dissect(struct vars *const, struct subre *, chr *const, chr *const);
! static int concatenationDissect(struct vars *const, struct subre *const, chr *const, chr *const);
! static int alternationDissect(struct vars *const, struct subre *, chr *const, chr *const);
! static inline int complicatedDissect(struct vars *const, struct subre *const, chr *const, chr *const);
! static int complicatedCapturingDissect(struct vars *const, struct subre *const, chr *const, chr *const);
! static int complicatedConcatenationDissect(struct vars *const, struct subre *const, chr *const, chr *const);
! static int complicatedReversedDissect(struct vars *const, struct subre *const, chr *const, chr *const);
! static int complicatedBackrefDissect(struct vars *const, struct subre *const, chr *const, chr *const);
! static int complicatedAlternationDissect(struct vars *const, struct subre *, chr *const, chr *const);
  /* === rege_dfa.c === */
  static chr *longest(struct vars *const, struct dfa *const, chr *const, chr *const, int *const);
  static chr *shortest(struct vars *const, struct dfa *const, chr *const, chr *const, chr *const, chr **const, int *const);
--- 128,143 ----
  static int simpleFind(struct vars *const, struct cnfa *const, struct colormap *const);
  static int complicatedFind(struct vars *const, struct cnfa *const, struct colormap *const);
  static int complicatedFindLoop(struct vars *const, struct cnfa *const, struct colormap *const, struct dfa *const, struct dfa *const, chr **const);
! static void zapallsubs(regmatch_t *const, const size_t);
! static void zaptreesubs(struct vars *const, struct subre *const);
  static void subset(struct vars *const, struct subre *const, chr *const, chr *const);
! static int cdissect(struct vars *, struct subre *, chr *, chr *);
! static int ccondissect(struct vars *, struct subre *, chr *, chr *);
! static int crevcondissect(struct vars *, struct subre *, chr *, chr *);
! static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
! static int caltdissect(struct vars *, struct subre *, chr *, chr *);
! static int citerdissect(struct vars *, struct subre *, chr *, chr *);
! static int creviterdissect(struct vars *, struct subre *, chr *, chr *);
  /* === rege_dfa.c === */
  static chr *longest(struct vars *const, struct dfa *const, chr *const, chr *const, int *const);
  static chr *shortest(struct vars *const, struct dfa *const, chr *const, chr *const, chr *const, chr **const, int *const);
*************** exec(
*** 176,183 ****
      size_t n;
  #define	LOCALMAT	20
      regmatch_t mat[LOCALMAT];
- #define	LOCALMEM	40
-     regoff_t mem[LOCALMEM];
  
      /*
       * Sanity checks.
--- 173,178 ----
*************** exec(
*** 235,262 ****
      v->start = (chr *)string;
      v->stop = (chr *)string + len;
      v->err = 0;
-     if (backref) {
- 	/*
- 	 * Need retry memory.
- 	 */
- 
- 	assert(v->g->ntree >= 0);
- 	n = (size_t)v->g->ntree;
- 	if (n <= LOCALMEM) {
- 	    v->mem = mem;
- 	} else {
- 	    v->mem = (regoff_t *) MALLOC(n*sizeof(regoff_t));
- 	}
- 	if (v->mem == NULL) {
- 	    if (v->pmatch != pmatch && v->pmatch != mat) {
- 		FREE(v->pmatch);
- 	    }
- 	    FreeVars(v);
- 	    return REG_ESPACE;
- 	}
-     } else {
- 	v->mem = NULL;
-     }
  
      /*
       * Do it.
--- 230,235 ----
*************** exec(
*** 274,280 ****
       */
  
      if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
! 	zapSubexpressions(pmatch, nmatch);
  	n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
  	memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
      }
--- 247,253 ----
       */
  
      if (st == REG_OKAY && v->pmatch != pmatch && nmatch > 0) {
! 	zapallsubs(pmatch, nmatch);
  	n = (nmatch < v->nmatch) ? nmatch : v->nmatch;
  	memcpy(VS(pmatch), VS(v->pmatch), n*sizeof(regmatch_t));
      }
*************** exec(
*** 286,294 ****
      if (v->pmatch != pmatch && v->pmatch != mat) {
  	FREE(v->pmatch);
      }
-     if (v->mem != NULL && v->mem != mem) {
- 	FREE(v->mem);
-     }
      FreeVars(v);
      return st;
  }
--- 259,264 ----
*************** simpleFind(
*** 388,398 ****
      }
  
      /*
!      * Submatches.
       */
  
!     zapSubexpressions(v->pmatch, v->nmatch);
!     return dissect(v, v->g->tree, begin, end);
  }
  
  /*
--- 358,368 ----
      }
  
      /*
!      * Find submatches.
       */
  
!     zapallsubs(v->pmatch, v->nmatch);
!     return cdissect(v, v->g->tree, begin, end);
  }
  
  /*
*************** complicatedFindLoop(
*** 488,496 ****
  		}
  
  		MDEBUG(("tentative end %ld\n", LOFF(end)));
! 		zapSubexpressions(v->pmatch, v->nmatch);
! 		zapSubtree(v, v->g->tree);
! 		er = complicatedDissect(v, v->g->tree, begin, end);
  		if (er == REG_OKAY) {
  		    if (v->nmatch > 0) {
  			v->pmatch[0].rm_so = OFF(begin);
--- 458,465 ----
  		}
  
  		MDEBUG(("tentative end %ld\n", LOFF(end)));
! 		zapallsubs(v->pmatch, v->nmatch);
! 		er = cdissect(v, v->g->tree, begin, end);
  		if (er == REG_OKAY) {
  		    if (v->nmatch > 0) {
  			v->pmatch[0].rm_so = OFF(begin);
*************** complicatedFindLoop(
*** 525,535 ****
  }
  
  /*
!  - zapSubexpressions - initialize the subexpression matches to "no match"
!  ^ static void zapSubexpressions(regmatch_t *, size_t);
   */
  static void
! zapSubexpressions(
      regmatch_t *const p,
      const size_t n)
  {
--- 494,504 ----
  }
  
  /*
!  - zapallsubs - initialize all subexpression matches to "no match"
!  ^ static void zapallsubs(regmatch_t *, size_t);
   */
  static void
! zapallsubs(
      regmatch_t *const p,
      const size_t n)
  {
*************** zapSubexpressions(
*** 542,577 ****
  }
  
  /*
!  - zapSubtree - initialize the retry memory of a subtree to zeros
!  ^ static void zapSubtree(struct vars *, struct subre *);
   */
  static void
! zapSubtree(
      struct vars *const v,
      struct subre *const t)
  {
-     if (t == NULL) {
- 	return;
-     }
- 
-     assert(v->mem != NULL);
-     v->mem[t->retry] = 0;
      if (t->op == '(') {
! 	assert(t->subno > 0);
! 	v->pmatch[t->subno].rm_so = -1;
! 	v->pmatch[t->subno].rm_eo = -1;
      }
  
      if (t->left != NULL) {
! 	zapSubtree(v, t->left);
      }
      if (t->right != NULL) {
! 	zapSubtree(v, t->right);
      }
  }
  
  /*
!  - subset - set any subexpression relevant to a successful subre
   ^ static void subset(struct vars *, struct subre *, chr *, chr *);
   */
  static void
--- 511,543 ----
  }
  
  /*
!  - zaptreesubs - initialize subexpressions within subtree to "no match"
!  ^ static void zaptreesubs(struct vars *, struct subre *);
   */
  static void
! zaptreesubs(
      struct vars *const v,
      struct subre *const t)
  {
      if (t->op == '(') {
! 	int n = t->subno;
! 	assert(n > 0);
! 	if ((size_t) n < v->nmatch) {
! 	    v->pmatch[n].rm_so = -1;
! 	    v->pmatch[n].rm_eo = -1;
! 	}
      }
  
      if (t->left != NULL) {
! 	zaptreesubs(v, t->left);
      }
      if (t->right != NULL) {
! 	zaptreesubs(v, t->right);
      }
  }
  
  /*
!  - subset - set subexpression match data for a successful subre
   ^ static void subset(struct vars *, struct subre *, chr *, chr *);
   */
  static void
*************** subset(
*** 594,836 ****
  }
  
  /*
!  - dissect - determine subexpression matches (uncomplicated case)
!  ^ static int dissect(struct vars *, struct subre *, chr *, chr *);
!  */
! static int			/* regexec return code */
! dissect(
!     struct vars *const v,
!     struct subre *t,
!     chr *const begin,		/* beginning of relevant substring */
!     chr *const end)		/* end of same */
! {
! #ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION
!   restart:
! #endif
!     assert(t != NULL);
!     MDEBUG(("dissect %ld-%ld\n", LOFF(begin), LOFF(end)));
! 
!     switch (t->op) {
!     case '=':			/* terminal node */
! 	assert(t->left == NULL && t->right == NULL);
! 	return REG_OKAY;	/* no action, parent did the work */
!     case '|':			/* alternation */
! 	assert(t->left != NULL);
! 	return alternationDissect(v, t, begin, end);
!     case 'b':			/* back ref -- shouldn't be calling us! */
! 	return REG_ASSERT;
!     case '.':			/* concatenation */
! 	assert(t->left != NULL && t->right != NULL);
! 	return concatenationDissect(v, t, begin, end);
!     case '(':			/* capturing */
! 	assert(t->left != NULL && t->right == NULL);
! 	assert(t->subno > 0);
! 	subset(v, t, begin, end);
! #ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION
! 	t = t->left;
! 	goto restart;
! #else
! 	return dissect(v, t->left, begin, end);
! #endif
!     default:
! 	return REG_ASSERT;
!     }
! }
! 
! /*
!  - concatenationDissect - determine concatenation subexpression matches
!  - (uncomplicated)
!  ^ static int concatenationDissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! concatenationDissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
      chr *const end)		/* end of same */
  {
!     struct dfa *d, *d2;
!     chr *mid;
!     int i;
!     int shorter = (t->left->flags&SHORTER) ? 1 : 0;
!     chr *stop = (shorter) ? end : begin;
! 
!     assert(t->op == '.');
!     assert(t->left != NULL && t->left->cnfa.nstates > 0);
!     assert(t->right != NULL && t->right->cnfa.nstates > 0);
! 
!     d = newDFA(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
!     NOERR();
!     d2 = newDFA(v, &t->right->cnfa, &v->g->cmap, &v->dfa2);
!     if (ISERR()) {
! 	assert(d2 == NULL);
! 	freeDFA(d);
! 	return v->err;
!     }
! 
!     /*
!      * Pick a tentative midpoint.
!      */
! 
!     if (shorter) {
! 	mid = shortest(v, d, begin, begin, end, NULL, NULL);
!     } else {
! 	mid = longest(v, d, begin, end, NULL);
!     }
!     if (mid == NULL) {
! 	freeDFA(d);
! 	freeDFA(d2);
! 	return REG_ASSERT;
!     }
!     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
! 
!     /*
!      * Iterate until satisfaction or failure.
!      */
! 
!     while (longest(v, d2, mid, end, NULL) != end) {
! 	/*
! 	 * That midpoint didn't work, find a new one.
! 	 */
! 
! 	if (mid == stop) {
! 	    /*
! 	     * All possibilities exhausted!
! 	     */
! 
! 	    MDEBUG(("no midpoint!\n"));
! 	    freeDFA(d);
! 	    freeDFA(d2);
! 	    return REG_ASSERT;
! 	}
! 	if (shorter) {
! 	    mid = shortest(v, d, begin, mid+1, end, NULL, NULL);
! 	} else {
! 	    mid = longest(v, d, begin, mid-1, NULL);
! 	}
! 	if (mid == NULL) {
! 	    /*
! 	     * Failed to find a new one!
! 	     */
! 
! 	    MDEBUG(("failed midpoint!\n"));
! 	    freeDFA(d);
! 	    freeDFA(d2);
! 	    return REG_ASSERT;
! 	}
! 	MDEBUG(("new midpoint %ld\n", LOFF(mid)));
!     }
! 
!     /*
!      * Satisfaction.
!      */
! 
!     MDEBUG(("successful\n"));
!     freeDFA(d);
!     freeDFA(d2);
!     i = dissect(v, t->left, begin, mid);
!     if (i != REG_OKAY) {
! 	return i;
!     }
!     return dissect(v, t->right, mid, end);
! }
! 
! /*
!  - alternationDissect - determine alternative subexpression matches (uncomplicated)
!  ^ static int alternationDissect(struct vars *, struct subre *, chr *, chr *);
!  */
! static int			/* regexec return code */
! alternationDissect(
!     struct vars *const v,
!     struct subre *t,
!     chr *const begin,		/* beginning of relevant substring */
!     chr *const end)		/* end of same */
! {
!     int i;
  
      assert(t != NULL);
!     assert(t->op == '|');
! 
!     for (i = 0; t != NULL; t = t->right, i++) {
! 	struct dfa *d;
! 
! 	MDEBUG(("trying %dth\n", i));
! 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
! 	d = newDFA(v, &t->left->cnfa, &v->g->cmap, &v->dfa1);
! 	if (ISERR()) {
! 	    return v->err;
! 	}
! 	if (longest(v, d, begin, end, NULL) == end) {
! 	    MDEBUG(("success\n"));
! 	    freeDFA(d);
! 	    return dissect(v, t->left, begin, end);
! 	}
! 	freeDFA(d);
!     }
!     return REG_ASSERT;		/* none of them matched?!? */
! }
! 
! /*
!  - complicatedDissect - determine subexpression matches (with complications)
!  * The retry memory stores the offset of the trial midpoint from begin, plus 1
!  * so that 0 uniquely means "clean slate".
!  ^ static int complicatedDissect(struct vars *, struct subre *, chr *, chr *);
!  */
! static inline int		/* regexec return code */
! complicatedDissect(
!     struct vars *const v,
!     struct subre *const t,
!     chr *const begin,		/* beginning of relevant substring */
!     chr *const end)		/* end of same */
! {
!     assert(t != NULL);
!     MDEBUG(("complicatedDissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
  
      switch (t->op) {
      case '=':			/* terminal node */
  	assert(t->left == NULL && t->right == NULL);
! 	return REG_OKAY;	/* no action, parent did the work */
!     case '|':			/* alternation */
! 	assert(t->left != NULL);
! 	return complicatedAlternationDissect(v, t, begin, end);
!     case 'b':			/* back ref -- shouldn't be calling us! */
  	assert(t->left == NULL && t->right == NULL);
! 	return complicatedBackrefDissect(v, t, begin, end);
      case '.':			/* concatenation */
  	assert(t->left != NULL && t->right != NULL);
! 	return complicatedConcatenationDissect(v, t, begin, end);
      case '(':			/* capturing */
  	assert(t->left != NULL && t->right == NULL);
  	assert(t->subno > 0);
! 	return complicatedCapturingDissect(v, t, begin, end);
      default:
! 	return REG_ASSERT;
      }
- }
  
! static int			/* regexec return code */
! complicatedCapturingDissect(
!     struct vars *const v,
!     struct subre *const t,
!     chr *const begin,		/* beginning of relevant substring */
!     chr *const end)		/* end of same */
! {
!     int er = complicatedDissect(v, t->left, begin, end);
  
-     if (er == REG_OKAY) {
- 	subset(v, t, begin, end);
-     }
      return er;
  }
  
  /*
!  - complicatedConcatenationDissect - concatenation subexpression matches (with complications)
!  * The retry memory stores the offset of the trial midpoint from begin, plus 1
!  * so that 0 uniquely means "clean slate".
!  ^ static int complicatedConcatenationDissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! complicatedConcatenationDissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
--- 560,646 ----
  }
  
  /*
!  - cdissect - check backrefs and determine subexpression matches
!  * cdissect recursively processes a subre tree to check matching of backrefs
!  * and/or identify submatch boundaries for capture nodes.  The proposed match
!  * runs from "begin" to "end" (not including "end"), and we are basically
!  * "dissecting" it to see where the submatches are.
!  * Before calling any level of cdissect, the caller must have run the node's
!  * DFA and found that the proposed substring satisfies the DFA.  (We make
!  * the caller do that because in concatenation and iteration nodes, it's
!  * much faster to check all the substrings against the child DFAs before we
!  * recurse.)  Also, caller must have cleared subexpression match data via
!  * zaptreesubs (or zapallsubs at the top level).
!  ^ static int cdissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! cdissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
      chr *const end)		/* end of same */
  {
!     int er;
  
      assert(t != NULL);
!     MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op));
  
      switch (t->op) {
      case '=':			/* terminal node */
  	assert(t->left == NULL && t->right == NULL);
! 	er = REG_OKAY;		/* no action, parent did the work */
! 	break;
!     case 'b':			/* back reference */
  	assert(t->left == NULL && t->right == NULL);
! 	er = cbrdissect(v, t, begin, end);
! 	break;
      case '.':			/* concatenation */
  	assert(t->left != NULL && t->right != NULL);
! 	if (t->left->flags & SHORTER) /* reverse scan */
! 	    er = crevcondissect(v, t, begin, end);
! 	else
! 	    er = ccondissect(v, t, begin, end);
! 	break;
!     case '|':			/* alternation */
! 	assert(t->left != NULL);
! 	er = caltdissect(v, t, begin, end);
! 	break;
!     case '*':			/* iteration */
! 	assert(t->left != NULL);
! 	if (t->left->flags & SHORTER) /* reverse scan */
! 	    er = creviterdissect(v, t, begin, end);
! 	else
! 	    er = citerdissect(v, t, begin, end);
! 	break;
      case '(':			/* capturing */
  	assert(t->left != NULL && t->right == NULL);
  	assert(t->subno > 0);
! 	er = cdissect(v, t->left, begin, end);
! 	if (er == REG_OKAY) {
! 	    subset(v, t, begin, end);
! 	}
! 	break;
      default:
! 	er = REG_ASSERT;
! 	break;
      }
  
!     /*
!      * We should never have a match failure unless backrefs lurk below;
!      * otherwise, either caller failed to check the DFA, or there's some
!      * inconsistency between the DFA and the node's innards.
!      */
!     assert(er != REG_NOMATCH || (t->flags & BACKR));
  
      return er;
  }
  
  /*
!  - ccondissect - concatenation subexpression matches (with complications)
!  ^ static int ccondissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! ccondissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
*************** complicatedConcatenationDissect(
*** 842,851 ****
      assert(t->op == '.');
      assert(t->left != NULL && t->left->cnfa.nstates > 0);
      assert(t->right != NULL && t->right->cnfa.nstates > 0);
! 
!     if (t->left->flags&SHORTER) { /* reverse scan */
! 	return complicatedReversedDissect(v, t, begin, end);
!     }
  
      d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
      if (ISERR()) {
--- 652,658 ----
      assert(t->op == '.');
      assert(t->left != NULL && t->left->cnfa.nstates > 0);
      assert(t->right != NULL && t->right->cnfa.nstates > 0);
!     assert(!(t->left->flags & SHORTER));
  
      d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
      if (ISERR()) {
*************** complicatedConcatenationDissect(
*** 856,880 ****
  	freeDFA(d);
  	return v->err;
      }
!     MDEBUG(("cConcat %d\n", t->retry));
  
      /*
       * Pick a tentative midpoint.
       */
! 
!     if (v->mem[t->retry] == 0) {
! 	mid = longest(v, d, begin, end, NULL);
! 	if (mid == NULL) {
! 	    freeDFA(d);
! 	    freeDFA(d2);
! 	    return REG_NOMATCH;
! 	}
! 	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
! 	v->mem[t->retry] = (mid - begin) + 1;
!     } else {
! 	mid = begin + (v->mem[t->retry] - 1);
! 	MDEBUG(("working midpoint %ld\n", LOFF(mid)));
      }
  
      /*
       * Iterate until satisfaction or failure.
--- 663,680 ----
  	freeDFA(d);
  	return v->err;
      }
!     MDEBUG(("cConcat %d\n", t->id));
  
      /*
       * Pick a tentative midpoint.
       */
!     mid = longest(v, d, begin, end, (int *) NULL);
!     if (mid == NULL) {
! 	freeDFA(d);
! 	freeDFA(d2);
! 	return REG_NOMATCH;
      }
+     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
  
      /*
       * Iterate until satisfaction or failure.
*************** complicatedConcatenationDissect(
*** 886,895 ****
  	 */
  
  	if (longest(v, d2, mid, end, NULL) == end) {
! 	    int er = complicatedDissect(v, t->left, begin, mid);
  
  	    if (er == REG_OKAY) {
! 		er = complicatedDissect(v, t->right, mid, end);
  		if (er == REG_OKAY) {
  		    /*
  		     * Satisfaction.
--- 686,695 ----
  	 */
  
  	if (longest(v, d2, mid, end, NULL) == end) {
! 	    int er = cdissect(v, t->left, begin, mid);
  
  	    if (er == REG_OKAY) {
! 		er = cdissect(v, t->right, mid, end);
  		if (er == REG_OKAY) {
  		    /*
  		     * Satisfaction.
*************** complicatedConcatenationDissect(
*** 901,907 ****
  		    return REG_OKAY;
  		}
  	    }
! 	    if ((er != REG_OKAY) && (er != REG_NOMATCH)) {
  		freeDFA(d);
  		freeDFA(d2);
  		return er;
--- 701,707 ----
  		    return REG_OKAY;
  		}
  	    }
! 	    if (er != REG_NOMATCH) {
  		freeDFA(d);
  		freeDFA(d2);
  		return er;
*************** complicatedConcatenationDissect(
*** 917,923 ****
  	     * All possibilities exhausted.
  	     */
  
! 	    MDEBUG(("%d no midpoint\n", t->retry));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
--- 717,723 ----
  	     * All possibilities exhausted.
  	     */
  
! 	    MDEBUG(("%d no midpoint\n", t->id));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
*************** complicatedConcatenationDissect(
*** 928,954 ****
  	     * Failed to find a new one.
  	     */
  
! 	    MDEBUG(("%d failed midpoint\n", t->retry));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
  	}
! 	MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
! 	v->mem[t->retry] = (mid - begin) + 1;
! 	zapSubtree(v, t->left);
! 	zapSubtree(v, t->right);
      }
  }
  
  /*
!  - complicatedReversedDissect - determine backref shortest-first subexpression
!  - matches
!  * The retry memory stores the offset of the trial midpoint from begin, plus 1
!  * so that 0 uniquely means "clean slate".
!  ^ static int complicatedReversedDissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! complicatedReversedDissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
--- 728,750 ----
  	     * Failed to find a new one.
  	     */
  
! 	    MDEBUG(("%d failed midpoint\n", t->id));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
  	}
! 	MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
! 	zaptreesubs(v, t->left);
! 	zaptreesubs(v, t->right);
      }
  }
  
  /*
!  - crevcondissect - dissect match for concatenation node, shortest-first
!  ^ static int crevcondissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! crevcondissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
*************** complicatedReversedDissect(
*** 962,971 ****
      assert(t->right != NULL && t->right->cnfa.nstates > 0);
      assert(t->left->flags&SHORTER);
  
-     /*
-      * Concatenation -- need to split the substring between parts.
-      */
- 
      d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
      if (ISERR()) {
  	return v->err;
--- 758,763 ----
*************** complicatedReversedDissect(
*** 975,999 ****
  	freeDFA(d);
  	return v->err;
      }
!     MDEBUG(("cRev %d\n", t->retry));
  
      /*
       * Pick a tentative midpoint.
       */
  
!     if (v->mem[t->retry] == 0) {
! 	mid = shortest(v, d, begin, begin, end, NULL, NULL);
! 	if (mid == NULL) {
! 	    freeDFA(d);
! 	    freeDFA(d2);
! 	    return REG_NOMATCH;
! 	}
! 	MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
! 	v->mem[t->retry] = (mid - begin) + 1;
!     } else {
! 	mid = begin + (v->mem[t->retry] - 1);
! 	MDEBUG(("working midpoint %ld\n", LOFF(mid)));
      }
  
      /*
       * Iterate until satisfaction or failure.
--- 767,785 ----
  	freeDFA(d);
  	return v->err;
      }
!     MDEBUG(("crevcon %d\n", t->id));
  
      /*
       * Pick a tentative midpoint.
       */
  
!     mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL);
!     if (mid == NULL) {
! 	freeDFA(d);
! 	freeDFA(d2);
! 	return REG_NOMATCH;
      }
+     MDEBUG(("tentative midpoint %ld\n", LOFF(mid)));
  
      /*
       * Iterate until satisfaction or failure.
*************** complicatedReversedDissect(
*** 1005,1014 ****
  	 */
  
  	if (longest(v, d2, mid, end, NULL) == end) {
! 	    int er = complicatedDissect(v, t->left, begin, mid);
  
  	    if (er == REG_OKAY) {
! 		er = complicatedDissect(v, t->right, mid, end);
  		if (er == REG_OKAY) {
  		    /*
  		     * Satisfaction.
--- 791,800 ----
  	 */
  
  	if (longest(v, d2, mid, end, NULL) == end) {
! 	    int er = cdissect(v, t->left, begin, mid);
  
  	    if (er == REG_OKAY) {
! 		er = cdissect(v, t->right, mid, end);
  		if (er == REG_OKAY) {
  		    /*
  		     * Satisfaction.
*************** complicatedReversedDissect(
*** 1020,1026 ****
  		    return REG_OKAY;
  		}
  	    }
! 	    if (er != REG_OKAY && er != REG_NOMATCH) {
  		freeDFA(d);
  		freeDFA(d2);
  		return er;
--- 806,812 ----
  		    return REG_OKAY;
  		}
  	    }
! 	    if (er != REG_NOMATCH) {
  		freeDFA(d);
  		freeDFA(d2);
  		return er;
*************** complicatedReversedDissect(
*** 1036,1042 ****
  	     * All possibilities exhausted.
  	     */
  
! 	    MDEBUG(("%d no midpoint\n", t->retry));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
--- 822,828 ----
  	     * All possibilities exhausted.
  	     */
  
! 	    MDEBUG(("%d no midpoint\n", t->id));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
*************** complicatedReversedDissect(
*** 1047,1210 ****
  	     * Failed to find a new one.
  	     */
  
! 	    MDEBUG(("%d failed midpoint\n", t->retry));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
  	}
! 	MDEBUG(("%d: new midpoint %ld\n", t->retry, LOFF(mid)));
! 	v->mem[t->retry] = (mid - begin) + 1;
! 	zapSubtree(v, t->left);
! 	zapSubtree(v, t->right);
      }
  }
  
  /*
!  - complicatedBackrefDissect - determine backref subexpression matches
!  ^ static int complicatedBackrefDissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! complicatedBackrefDissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
      chr *const end)		/* end of same */
  {
!     int i, n = t->subno, min = t->min, max = t->max;
!     chr *paren, *p, *stop;
!     size_t len;
  
      assert(t != NULL);
      assert(t->op == 'b');
      assert(n >= 0);
      assert((size_t)n < v->nmatch);
  
!     MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
  
      if (v->pmatch[n].rm_so == -1) {
  	return REG_NOMATCH;
      }
!     paren = v->start + v->pmatch[n].rm_so;
!     len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
  
      /*
!      * No room to maneuver -- retries are pointless.
       */
  
!     if (v->mem[t->retry]) {
  	return REG_NOMATCH;
      }
-     v->mem[t->retry] = 1;
  
!     /*
!      * Special-case zero-length string.
!      */
  
!     if (len == 0) {
! 	if (begin == end) {
! 	    return REG_OKAY;
  	}
! 	return REG_NOMATCH;
      }
  
      /*
!      * And too-short string.
       */
! 
!     assert(end >= begin);
!     if ((size_t)(end - begin) < len) {
! 	return REG_NOMATCH;
      }
-     stop = end - len;
  
      /*
!      * Count occurrences.
       */
  
!     i = 0;
!     for (p = begin; p <= stop && (i < max || max == DUPINF); p += len) {
! 	if (v->g->compare(paren, p, len) != 0) {
! 	    break;
! 	}
! 	i++;
      }
!     MDEBUG(("cbackref found %d\n", i));
  
      /*
!      * And sort it out.
       */
  
!     if (p != end) {		/* didn't consume all of it */
! 	return REG_NOMATCH;
!     }
!     if (min <= i && (i <= max || max == DUPINF)) {
! 	return REG_OKAY;
      }
!     return REG_NOMATCH;		/* out of range */
  }
  
  /*
!  - complicatedAlternationDissect - determine alternative subexpression matches (w.
!  - complications)
!  ^ static int complicatedAlternationDissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! complicatedAlternationDissect(
!     struct vars *const v,
!     struct subre *t,
!     chr *const begin,		/* beginning of relevant substring */
!     chr *const end)		/* end of same */
  {
!     int er;
! #define	UNTRIED	0		/* not yet tried at all */
! #define	TRYING	1		/* top matched, trying submatches */
! #define	TRIED	2		/* top didn't match or submatches exhausted */
  
! #ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION
!     if (0) {
!     doRight:
! 	t = t->right;
!     }
! #endif
!     if (t == NULL) {
! 	return REG_NOMATCH;
      }
!     assert(t->op == '|');
!     if (v->mem[t->retry] == TRIED) {
! 	goto doRight;
      }
  
!     MDEBUG(("cAlt n%d\n", t->retry));
!     assert(t->left != NULL);
  
!     if (v->mem[t->retry] == UNTRIED) {
! 	struct dfa *d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
  
! 	if (ISERR()) {
! 	    return v->err;
  	}
! 	if (longest(v, d, begin, end, NULL) != end) {
  	    freeDFA(d);
! 	    v->mem[t->retry] = TRIED;
! 	    goto doRight;
  	}
- 	freeDFA(d);
- 	MDEBUG(("cAlt matched\n"));
- 	v->mem[t->retry] = TRYING;
-     }
  
!     er = complicatedDissect(v, t->left, begin, end);
!     if (er != REG_NOMATCH) {
! 	return er;
      }
  
!     v->mem[t->retry] = TRIED;
! #ifndef COMPILER_DOES_TAILCALL_OPTIMIZATION
!     goto doRight;
! #else
!   doRight:
!     return complicatedAlternationDissect(v, t->right, begin, end);
! #endif
  }
  
  #include "rege_dfa.c"
--- 833,1316 ----
  	     * Failed to find a new one.
  	     */
  
! 	    MDEBUG(("%d failed midpoint\n", t->id));
  	    freeDFA(d);
  	    freeDFA(d2);
  	    return REG_NOMATCH;
  	}
! 	MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid)));
! 	zaptreesubs(v, t->left);
! 	zaptreesubs(v, t->right);
      }
  }
  
  /*
!  - cbrdissect - dissect match for backref node
!  ^ static int cbrdissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! cbrdissect(
      struct vars *const v,
      struct subre *const t,
      chr *const begin,		/* beginning of relevant substring */
      chr *const end)		/* end of same */
  {
!     int n = t->subno, min = t->min, max = t->max;
!     size_t numreps;
!     size_t tlen;
!     size_t brlen;
!     chr *brstring;
!     chr *p;
  
      assert(t != NULL);
      assert(t->op == 'b');
      assert(n >= 0);
      assert((size_t)n < v->nmatch);
  
!     MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max));
  
+     /* get the backreferenced string */
      if (v->pmatch[n].rm_so == -1) {
  	return REG_NOMATCH;
      }
!     brstring = v->start + v->pmatch[n].rm_so;
!     brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
! 
!     /* special cases for zero-length strings */
!     if (brlen == 0) {
! 	/*
! 	 * matches only if target is zero length, but any number of
! 	 * repetitions can be considered to be present
! 	 */
! 	if (begin == end && min <= max) {
! 	    MDEBUG(("cbackref matched trivially\n"));
! 	    return REG_OKAY;
! 	}
! 	return REG_NOMATCH;
!     }
!     if (begin == end) {
! 	/* matches only if zero repetitions are okay */
! 	if (min == 0) {
! 	    MDEBUG(("cbackref matched trivially\n"));
! 	    return REG_OKAY;
! 	}
! 	return REG_NOMATCH;
!     }
  
      /*
!      * check target length to see if it could possibly be an allowed number of
!      * repetitions of brstring
       */
  
!     assert(end > begin);
!     tlen = end - begin;
!     if (tlen % brlen != 0)
! 	return REG_NOMATCH;
!     numreps = tlen / brlen;
!     if (numreps < min || (numreps > max && max != DUPINF))
  	return REG_NOMATCH;
+ 
+     /* okay, compare the actual string contents */
+     p = begin;
+     while (numreps-- > 0) {
+ 	if ((*v->g->compare) (brstring, p, brlen) != 0)
+ 	    return REG_NOMATCH;
+ 	p += brlen;
      }
  
!     MDEBUG(("cbackref matched\n"));
!     return REG_OKAY;
! }
! 
! /*
!  - caltdissect - dissect match for alternation node
!  ^ static int caltdissect(struct vars *, struct subre *, chr *, chr *);
!  */
! static int			/* regexec return code */
! caltdissect(
!     struct vars *const v,
!     struct subre *t,
!     chr *const begin,		/* beginning of relevant substring */
!     chr *const end)		/* end of same */
! {
!     struct dfa *d;
!     int er;
  
!     /* We loop, rather than tail-recurse, to handle a chain of alternatives */
!     while (t != NULL) {
! 	assert(t->op == '|');
! 	assert(t->left != NULL && t->left->cnfa.nstates > 0);
! 
! 	MDEBUG(("calt n%d\n", t->id));
! 
! 	d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
! 	NOERR();
! 	if (longest(v, d, begin, end, (int *) NULL) == end) {
! 	    freeDFA(d);
! 	    MDEBUG(("calt matched\n"));
! 	    er = cdissect(v, t->left, begin, end);
! 	    if (er != REG_NOMATCH) {
! 		return er;
! 	    }
  	}
! 	freeDFA(d);
! 
! 	t = t->right;
      }
  
+     return REG_NOMATCH;
+ }
+ 
+ /*
+  - citerdissect - dissect match for iteration node
+  ^ static int citerdissect(struct vars *, struct subre *, chr *, chr *);
+  */
+ static int			/* regexec return code */
+ citerdissect(struct vars * v,
+ 	     struct subre * t,
+ 	     chr *begin,	/* beginning of relevant substring */
+ 	     chr *end)		/* end of same */
+ {
+     struct dfa *d;
+     chr	  **endpts;
+     chr	   *limit;
+     int		min_matches;
+     size_t	max_matches;
+     int		nverified;
+     int		k;
+     int		i;
+     int		er;
+ 
+     assert(t->op == '*');
+     assert(t->left != NULL && t->left->cnfa.nstates > 0);
+     assert(!(t->left->flags & SHORTER));
+     assert(begin <= end);
+ 
      /*
!      * If zero matches are allowed, and target string is empty, just declare
!      * victory.  OTOH, if target string isn't empty, zero matches can't work
!      * so we pretend the min is 1.
       */
!     min_matches = t->min;
!     if (min_matches <= 0) {
! 	if (begin == end)
! 	    return REG_OKAY;
! 	min_matches = 1;
      }
  
      /*
!      * We need workspace to track the endpoints of each sub-match.  Normally
!      * we consider only nonzero-length sub-matches, so there can be at most
!      * end-begin of them.  However, if min is larger than that, we will also
!      * consider zero-length sub-matches in order to find enough matches.
!      *
!      * For convenience, endpts[0] contains the "begin" pointer and we store
!      * sub-match endpoints in endpts[1..max_matches].
       */
+     max_matches = end - begin;
+     if (max_matches > t->max && t->max != DUPINF)
+ 	max_matches = t->max;
+     if (max_matches < min_matches)
+ 	max_matches = min_matches;
+     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
+     if (endpts == NULL)
+ 	return REG_ESPACE;
+     endpts[0] = begin;
  
!     d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
!     if (ISERR()) {
! 	FREE(endpts);
! 	return v->err;
      }
!     MDEBUG(("citer %d\n", t->id));
  
      /*
!      * Our strategy is to first find a set of sub-match endpoints that are
!      * valid according to the child node's DFA, and then recursively dissect
!      * each sub-match to confirm validity.  If any validity check fails,
!      * backtrack the last sub-match and try again.  And, when we next try for
!      * a validity check, we need not recheck any successfully verified
!      * sub-matches that we didn't move the endpoints of.  nverified remembers
!      * how many sub-matches are currently known okay.
       */
  
!     /* initialize to consider first sub-match */
!     nverified = 0;
!     k = 1;
!     limit = end;
! 
!     /* iterate until satisfaction or failure */
!     while (k > 0) {
! 	/* try to find an endpoint for the k'th sub-match */
! 	endpts[k] = longest(v, d, endpts[k - 1], limit, (int *) NULL);
! 	if (endpts[k] == NULL) {
! 	    /* no match possible, so see if we can shorten previous one */
! 	    k--;
! 	    goto backtrack;
! 	}
! 	MDEBUG(("%d: working endpoint %d: %ld\n",
! 		t->id, k, LOFF(endpts[k])));
! 
! 	/* k'th sub-match can no longer be considered verified */
! 	if (nverified >= k)
! 	    nverified = k - 1;
! 
! 	if (endpts[k] != end) {
! 	    /* haven't reached end yet, try another iteration if allowed */
! 	    if (k >= max_matches) {
! 		/* must try to shorten some previous match */
! 		k--;
! 		goto backtrack;
! 	    }
! 
! 	    /* reject zero-length match unless necessary to achieve min */
! 	    if (endpts[k] == endpts[k - 1] &&
! 		(k >= min_matches || min_matches - k < end - endpts[k]))
! 		goto backtrack;
! 
! 	    k++;
! 	    limit = end;
! 	    continue;
! 	}
! 
! 	/*
! 	 * We've identified a way to divide the string into k sub-matches
! 	 * that works so far as the child DFA can tell.  If k is an allowed
! 	 * number of matches, start the slow part: recurse to verify each
! 	 * sub-match.  We always have k <= max_matches, needn't check that.
! 	 */
! 	if (k < min_matches)
! 	    goto backtrack;
! 
! 	MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
! 
! 	for (i = nverified + 1; i <= k; i++) {
! 	    zaptreesubs(v, t->left);
! 	    er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
! 	    if (er == REG_OKAY) {
! 		nverified = i;
! 		continue;
! 	    }
! 	    if (er == REG_NOMATCH)
! 		break;
! 	    /* oops, something failed */
! 	    freeDFA(d);
! 	    FREE(endpts);
! 	    return er;
! 	}
! 
! 	if (i > k) {
! 	    /* satisfaction */
! 	    MDEBUG(("%d successful\n", t->id));
! 	    freeDFA(d);
! 	    FREE(endpts);
! 	    return REG_OKAY;
! 	}
! 
! 	/* match failed to verify, so backtrack */
! 
!     backtrack:
! 	/*
! 	 * Must consider shorter versions of the current sub-match.  However,
! 	 * we'll only ask for a zero-length match if necessary.
! 	 */
! 	while (k > 0) {
! 	    chr	   *prev_end = endpts[k - 1];
! 
! 	    if (endpts[k] > prev_end) {
! 		limit = endpts[k] - 1;
! 		if (limit > prev_end ||
! 		    (k < min_matches && min_matches - k >= end - prev_end)) {
! 		    /* break out of backtrack loop, continue the outer one */
! 		    break;
! 		}
! 	    }
! 	    /* can't shorten k'th sub-match any more, consider previous one */
! 	    k--;
! 	}
      }
! 
!     /* all possibilities exhausted */
!     MDEBUG(("%d failed\n", t->id));
!     freeDFA(d);
!     FREE(endpts);
!     return REG_NOMATCH;
  }
  
  /*
!  - creviterdissect - dissect match for iteration node, shortest-first
!  ^ static int creviterdissect(struct vars *, struct subre *, chr *, chr *);
   */
  static int			/* regexec return code */
! creviterdissect(struct vars * v,
! 		struct subre * t,
! 		chr *begin,	/* beginning of relevant substring */
! 		chr *end)	/* end of same */
  {
!     struct dfa *d;
!     chr	  **endpts;
!     chr	   *limit;
!     int		min_matches;
!     size_t	max_matches;
!     int		nverified;
!     int		k;
!     int		i;
!     int		er;
  
!     assert(t->op == '*');
!     assert(t->left != NULL && t->left->cnfa.nstates > 0);
!     assert(t->left->flags & SHORTER);
!     assert(begin <= end);
! 
!     /*
!      * If zero matches are allowed, and target string is empty, just declare
!      * victory.  OTOH, if target string isn't empty, zero matches can't work
!      * so we pretend the min is 1.
!      */
!     min_matches = t->min;
!     if (min_matches <= 0) {
! 	if (begin == end)
! 	    return REG_OKAY;
! 	min_matches = 1;
      }
! 
!     /*
!      * We need workspace to track the endpoints of each sub-match.  Normally
!      * we consider only nonzero-length sub-matches, so there can be at most
!      * end-begin of them.  However, if min is larger than that, we will also
!      * consider zero-length sub-matches in order to find enough matches.
!      *
!      * For convenience, endpts[0] contains the "begin" pointer and we store
!      * sub-match endpoints in endpts[1..max_matches].
!      */
!     max_matches = end - begin;
!     if (max_matches > t->max && t->max != DUPINF)
! 	max_matches = t->max;
!     if (max_matches < min_matches)
! 	max_matches = min_matches;
!     endpts = (chr **) MALLOC((max_matches + 1) * sizeof(chr *));
!     if (endpts == NULL)
! 	return REG_ESPACE;
!     endpts[0] = begin;
! 
!     d = newDFA(v, &t->left->cnfa, &v->g->cmap, DOMALLOC);
!     if (ISERR()) {
! 	FREE(endpts);
! 	return v->err;
      }
+     MDEBUG(("creviter %d\n", t->id));
  
!     /*
!      * Our strategy is to first find a set of sub-match endpoints that are
!      * valid according to the child node's DFA, and then recursively dissect
!      * each sub-match to confirm validity.  If any validity check fails,
!      * backtrack the last sub-match and try again.  And, when we next try for
!      * a validity check, we need not recheck any successfully verified
!      * sub-matches that we didn't move the endpoints of.  nverified remembers
!      * how many sub-matches are currently known okay.
!      */
  
!     /* initialize to consider first sub-match */
!     nverified = 0;
!     k = 1;
!     limit = begin;
  
!     /* iterate until satisfaction or failure */
!     while (k > 0) {
! 	/* disallow zero-length match unless necessary to achieve min */
! 	if (limit == endpts[k - 1] &&
! 	    limit != end &&
! 	    (k >= min_matches || min_matches - k < end - limit))
! 	    limit++;
! 
! 	/* if this is the last allowed sub-match, it must reach to the end */
! 	if (k >= max_matches)
! 	    limit = end;
! 
! 	/* try to find an endpoint for the k'th sub-match */
! 	endpts[k] = shortest(v, d, endpts[k - 1], limit, end,
! 			     (chr **) NULL, (int *) NULL);
! 	if (endpts[k] == NULL) {
! 	    /* no match possible, so see if we can lengthen previous one */
! 	    k--;
! 	    goto backtrack;
  	}
! 	MDEBUG(("%d: working endpoint %d: %ld\n",
! 		t->id, k, LOFF(endpts[k])));
! 
! 	/* k'th sub-match can no longer be considered verified */
! 	if (nverified >= k)
! 	    nverified = k - 1;
! 
! 	if (endpts[k] != end) {
! 	    /* haven't reached end yet, try another iteration if allowed */
! 	    if (k >= max_matches) {
! 		/* must try to lengthen some previous match */
! 		k--;
! 		goto backtrack;
! 	    }
! 
! 	    k++;
! 	    limit = endpts[k - 1];
! 	    continue;
! 	}
! 
! 	/*
! 	 * We've identified a way to divide the string into k sub-matches
! 	 * that works so far as the child DFA can tell.  If k is an allowed
! 	 * number of matches, start the slow part: recurse to verify each
! 	 * sub-match.  We always have k <= max_matches, needn't check that.
! 	 */
! 	if (k < min_matches)
! 	    goto backtrack;
! 
! 	MDEBUG(("%d: verifying %d..%d\n", t->id, nverified + 1, k));
! 
! 	for (i = nverified + 1; i <= k; i++) {
! 	    zaptreesubs(v, t->left);
! 	    er = cdissect(v, t->left, endpts[i - 1], endpts[i]);
! 	    if (er == REG_OKAY) {
! 		nverified = i;
! 		continue;
! 	    }
! 	    if (er == REG_NOMATCH)
! 		break;
! 	    /* oops, something failed */
  	    freeDFA(d);
! 	    FREE(endpts);
! 	    return er;
  	}
  
! 	if (i > k) {
! 	    /* satisfaction */
! 	    MDEBUG(("%d successful\n", t->id));
! 	    freeDFA(d);
! 	    FREE(endpts);
! 	    return REG_OKAY;
! 	}
! 
! 	/* match failed to verify, so backtrack */
! 
!     backtrack:
! 	/*
! 	 * Must consider longer versions of the current sub-match.
! 	 */
! 	while (k > 0) {
! 	    if (endpts[k] < end) {
! 		limit = endpts[k] + 1;
! 		/* break out of backtrack loop, continue the outer one */
! 		break;
! 	    }
! 	    /* can't lengthen k'th sub-match any more, consider previous one */
! 	    k--;
! 	}
      }
  
!     /* all possibilities exhausted */
!     MDEBUG(("%d failed\n", t->id));
!     freeDFA(d);
!     FREE(endpts);
!     return REG_NOMATCH;
  }
  
  #include "rege_dfa.c"
diff -pcdr tcl8.6.4.orig/generic/regguts.h tcl8.6.4/generic/regguts.h
*** tcl8.6.4.orig/generic/regguts.h	Thu Feb 26 11:57:28 2015
--- tcl8.6.4/generic/regguts.h	Thu Sep 17 18:52:18 2015
*************** struct cnfa {
*** 329,339 ****
  
  /*
   * subexpression tree
   */
  
  struct subre {
!     char op;			/* '|', '.' (concat), 'b' (backref), '(',
! 				 * '=' */
      char flags;
  #define	LONGER	01		/* prefers longer match */
  #define	SHORTER	02		/* prefers shorter match */
--- 329,356 ----
  
  /*
   * subexpression tree
+  *
+  * "op" is one of:
+  *	'='  plain regex without interesting substructure (implemented as DFA)
+  *	'b'  back-reference (has no substructure either)
+  *	'('  capture node: captures the match of its single child
+  *	'.'  concatenation: matches a match for left, then a match for right
+  *	'|'  alternation: matches a match for left or a match for right
+  *	'*'  iteration: matches some number of matches of its single child
+  *
+  * Note: the right child of an alternation must be another alternation or
+  * NULL; hence, an N-way branch requires N alternation nodes, not N-1 as you
+  * might expect.  This could stand to be changed.  Actually I'd rather see
+  * a single alternation node with N children, but that will take revising
+  * the representation of struct subre.
+  *
+  * Note: when a backref is directly quantified, we stick the min/max counts
+  * into the backref rather than plastering an iteration node on top.  This is
+  * for efficiency: there is no need to search for possible division points.
   */
  
  struct subre {
!     char op;			/* see type codes above */
      char flags;
  #define	LONGER	01		/* prefers longer match */
  #define	SHORTER	02		/* prefers shorter match */
*************** struct subre {
*** 349,358 ****
  #define	PREF(f)	((f)&NOPROP)
  #define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
  #define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
!     short retry;		/* index into retry memory */
      int subno;			/* subexpression number (for 'b' and '(') */
!     short min;			/* min repetitions, for backref only */
!     short max;			/* max repetitions, for backref only */
      struct subre *left;		/* left child, if any (also freelist chain) */
      struct subre *right;	/* right child, if any */
      struct state *begin;	/* outarcs from here... */
--- 366,375 ----
  #define	PREF(f)	((f)&NOPROP)
  #define	PREF2(f1, f2)	((PREF(f1) != 0) ? PREF(f1) : PREF(f2))
  #define	COMBINE(f1, f2)	(UP((f1)|(f2)) | PREF2(f1, f2))
!     short id;			/* ID of subre (1..ntree) */
      int subno;			/* subexpression number (for 'b' and '(') */
!     short min;			/* min repetitions for iteration or backref */
!     short max;			/* max repetitions for iteration or backref */
      struct subre *left;		/* left child, if any (also freelist chain) */
      struct subre *right;	/* right child, if any */
      struct state *begin;	/* outarcs from here... */
diff -pcdr tcl8.6.4.orig/tests/reg.test tcl8.6.4/tests/reg.test
*** tcl8.6.4.orig/tests/reg.test	Thu Feb 26 11:57:28 2015
--- tcl8.6.4/tests/reg.test	Fri Sep 18 12:26:33 2015
*************** expectError	14.19 -		{a(b)c\2}	ESUBREG
*** 669,675 ****
  expectMatch	14.20 bR	{a\(b*\)c\1}	abbcbb	abbcbb	bb
  expectMatch	14.21 RP	{^([bc])\1*$}	bbb	bbb	b
  expectMatch	14.22 RP	{^([bc])\1*$}	ccc	ccc	c
! knownBug expectNomatch 14.23 R	{^([bc])\1*$}	bcb
  
  
  doing 15 "octal escapes vs back references"
--- 669,681 ----
  expectMatch	14.20 bR	{a\(b*\)c\1}	abbcbb	abbcbb	bb
  expectMatch	14.21 RP	{^([bc])\1*$}	bbb	bbb	b
  expectMatch	14.22 RP	{^([bc])\1*$}	ccc	ccc	c
! expectNomatch	14.23 RP	{^([bc])\1*$}	bcb
! expectMatch	14.24 LRP	{^(\w+)( \1)+$}	{abc abc abc} {abc abc abc} abc { abc}
! expectNomatch	14.25 LRP	{^(\w+)( \1)+$}	{abc abd abc}
! expectNomatch	14.26 LRP	{^(\w+)( \1)+$}	{abc abc abd}
! expectMatch	14.27 RP	{^(.+)( \1)+$}	{abc abc abc} {abc abc abc} abc { abc}
! expectNomatch	14.28 RP	{^(.+)( \1)+$}	{abc abd abc}
! expectNomatch	14.29 RP	{^(.+)( \1)+$}	{abc abc abd}
  
  
  doing 15 "octal escapes vs back references"
*************** expectMatch	21.31 LP	"\\y(\\w+)\\y"	"-- 
*** 796,801 ****
--- 802,808 ----
  expectMatch	21.32 -		a((b|c)d+)+	abacdbd	acdbd	bd	b
  expectMatch	21.33 N		(.*).*		abc	abc	abc
  expectMatch	21.34 N		(a*)*		bc	""	""
+ expectMatch	21.35 M		{ TO (([a-z0-9._]+|"([^"]+|"")+")+)}	{asd TO foo}	{ TO foo} foo o {}
  
  
  doing 22 "multicharacter collating elements"
*************** expectMatch	24.9  -		3z*		123zzzz456	3zz
*** 848,853 ****
--- 855,861 ----
  expectMatch	24.10 PT	3z*?		123zzzz456	3
  expectMatch	24.11 -		z*4		123zzzz456	zzzz4
  expectMatch	24.12 PT	z*?4		123zzzz456	zzzz4
+ expectMatch	24.13 PT	{^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$}	{foo/bar/baz}	{foo/bar/baz} {foo} {bar} {baz}
  
  
  doing 25 "mixed quantifiers"