View | Details | Raw Unified | Return to bug 14342
Collapse All | Expand All

(-)regex/engine.c (-11 / +34 lines)
Lines 144-157 Link Here
144
int eflags;
144
int eflags;
145
{
145
{
146
	register char *endp;
146
	register char *endp;
147
	register int i;
147
	register int i, j;
148
	int patlen = g->mlen;
148
	struct match mv;
149
	struct match mv;
149
	register struct match *m = &mv;
150
	register struct match *m = &mv;
150
	register char *dp;
151
	register char *dp, *k, *s;
151
	register const sopno gf = g->firststate+1;	/* +1 for OEND */
152
	register const sopno gf = g->firststate+1;	/* +1 for OEND */
152
	register const sopno gl = g->laststate;
153
	register const sopno gl = g->laststate;
153
	char *start;
154
	char *start, *stop;
154
	char *stop;
155
155
156
	/* simplify the situation where possible */
156
	/* simplify the situation where possible */
157
	if (g->cflags&REG_NOSUB)
157
	if (g->cflags&REG_NOSUB)
Lines 167-180 Link Here
167
		return(REG_INVARG);
167
		return(REG_INVARG);
168
168
169
	/* prescreening; this does wonders for this rather slow code */
169
	/* prescreening; this does wonders for this rather slow code */
170
	if (g->must != NULL) {
170
 	if (g->must != NULL) {
171
		for (dp = start; dp < stop; dp++)
171
		for (k = string + patlen - 1; k < stop;) {
172
			if (*dp == g->must[0] && stop - dp >= g->mlen &&
172
			/*
173
				memcmp(dp, g->must, (size_t)g->mlen) == 0)
173
			 * for a large class of patterns, upwards of 80% of
174
				break;
174
			 * match time is spent on the next line.  we beat
175
		if (dp == stop)		/* we didn't find g->must */
175
			 * existing microcode (vax 'matchc') this way. 
176
			return(REG_NOMATCH);
176
			 */
177
			while ((k += g->delta0[*(unsigned char *) k]) < stop);
178
			if (k < string + LARGE)
179
				 break;
180
			k -= LARGE;
181
			
182
 			j = patlen - 1;
183
	 		s = k - 1;
184
		 	while (g->cmap[*((unsigned char *)s--)] == 
185
			       g->must[--j]);
186
		
187
			/*
188
			 * delta-less shortcut for literati. short shrift for
189
			 * genetic engineers? 
190
			 */
191
192
			if (j < 0)
193
				goto bmpassed;
194
			
195
			k++;    /* no match; restart next char */
196
		}
197
		return REG_NOMATCH;
177
	}
198
	}
199
200
bmpassed:
178
201
179
	/* match struct setup */
202
	/* match struct setup */
180
	m->g = g;
203
	m->g = g;
(-)regex/regcomp.c (+44 lines)
Lines 245-250 Link Here
245
	g->categories = &g->catspace[-(CHAR_MIN)];
245
	g->categories = &g->catspace[-(CHAR_MIN)];
246
	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
246
	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
247
	g->backrefs = 0;
247
	g->backrefs = 0;
248
	(void) memset((void *)g->delta0, 0, 256 * sizeof(int));
248
249
249
	/* do it */
250
	/* do it */
250
	EMIT(OEND, 0);
251
	EMIT(OEND, 0);
Lines 1738-1743 Link Here
1738
	}
1739
	}
1739
	assert(cp == g->must + g->mlen);
1740
	assert(cp == g->must + g->mlen);
1740
	*cp++ = '\0';		/* just on general principles */
1741
	*cp++ = '\0';		/* just on general principles */
1742
	gosper(g->must, g->delta0, g->cmap);
1741
}
1743
}
1742
1744
1743
/*
1745
/*
Lines 1774-1777 Link Here
1774
	if (plusnest != 0)
1776
	if (plusnest != 0)
1775
		g->iflags |= BAD;
1777
		g->iflags |= BAD;
1776
	return(maxnest);
1778
	return(maxnest);
1779
}
1780
1781
1782
#define NALT	7		/* tied to scanf() size in alternate() */
1783
1784
/*
1785
 * Compute "Boyer-Moore" delta table -- put skip distance in delta0[c] 
1786
 */
1787
void
1788
gosper(pattern, delta0, cmap)
1789
	char *pattern;		/* ... HAKMEM lives ... */
1790
	int  *delta0;           /* Who/what is HAKMEM? */
1791
	char *cmap;
1792
{
1793
	char *altpat[NALT];
1794
	int altlen[NALT];
1795
	register int j, altmin;
1796
	unsigned char c;
1797
1798
	/* Make one-string case look like simple alternatives case */
1799
	altmin = altlen[0] = strlen(pattern);
1800
	altpat[0] = pattern;
1801
	
1802
	/* For chars that aren't in any string, skip by string length. */
1803
 	for (j = 0; j < 256; j++) { 
1804
 		delta0[j] = altmin; 
1805
		cmap[j] = j;	/* Sneak in initialization of cmap */
1806
	}
1807
1808
	/* For chars in a string, skip distance from char to end of string. */
1809
	/* (If char appears more than once, skip minimum distance.) */
1810
1811
	delta0[0] = 0;
1812
	for (j = 0; j < altlen[0] - 1; j++) {
1813
		c = altpat[0][j];
1814
		delta0[c] = MIN(delta0[c], altlen[0] - j - 1);
1815
	}
1816
1817
	/* For last char of each string, fall out of search loop. */
1818
	c = altpat[0][altlen[0] - 1];
1819
	delta0[c] = LARGE;	 
1820
1777
}
1821
}
(-)regex/regex2.h (+15 lines)
Lines 56-61 Link Here
56
 */
56
 */
57
#define	MAGIC1	((('r'^0200)<<8) | 'e')
57
#define	MAGIC1	((('r'^0200)<<8) | 'e')
58
58
59
#define META	"\n^$.[]()?+*|\\"	/* egrep meta-characters */
60
59
/*
61
/*
60
 * The internal representation is a *strip*, a sequence of
62
 * The internal representation is a *strip*, a sequence of
61
 * operators ending with an endmarker.  (Some terminology etc. is a
63
 * operators ending with an endmarker.  (Some terminology etc. is a
Lines 164-169 Link Here
164
	size_t nsub;		/* copy of re_nsub */
166
	size_t nsub;		/* copy of re_nsub */
165
	int backrefs;		/* does it use back references? */
167
	int backrefs;		/* does it use back references? */
166
	sopno nplus;		/* how deep does it nest +s? */
168
	sopno nplus;		/* how deep does it nest +s? */
169
	int delta0[256];        /* Boyer-Moore algorithm core */
170
	char cmap[256];
167
	/* catspace must be last */
171
	/* catspace must be last */
168
	cat_t catspace[1];	/* actually [NC] */
172
	cat_t catspace[1];	/* actually [NC] */
169
};
173
};
Lines 171-173 Link Here
171
/* misc utilities */
175
/* misc utilities */
172
#define	OUT	(CHAR_MAX+1)	/* a non-character value */
176
#define	OUT	(CHAR_MAX+1)	/* a non-character value */
173
#define ISWORD(c)       (isalnum((uch)(c)) || (c) == '_')
177
#define ISWORD(c)       (isalnum((uch)(c)) || (c) == '_')
178
179
/* stuff used by Boyer-Moore */
180
181
#define	MIN(A, B)	((A) > (B) ? (B) : (A))
182
183
#define BUFSIZE	65536		/* make higher for cray */
184
#define PATSIZE 6000
185
#define LARGE 	BUFSIZE + PATSIZE
186
187
void             gosper(char *, int *, char *);
188
int              chimaera(char *, char *, struct re_guts *);

Return to bug 14342