View | Details | Raw Unified | Return to bug 229925 | Differences between
and this patch

Collapse All | Expand All

(-)b/contrib/netbsd-tests/lib/libc/regex/data/meta.in (-1 / +3 lines)
Lines 4-10 a[bc]d & abd abd Link Here
4
a\*c		&	a*c	a*c
4
a\*c		&	a*c	a*c
5
a\\b		&	a\b	a\b
5
a\\b		&	a\b	a\b
6
a\\\*b		&	a\*b	a\*b
6
a\\\*b		&	a\*b	a\*b
7
a\bc		&	abc	abc
7
# Begin FreeBSD
8
a\bc		&C	EESCAPE
9
# End FreeBSD
8
a\		&C	EESCAPE
10
a\		&C	EESCAPE
9
a\\bc		&	a\bc	a\bc
11
a\\bc		&	a\bc	a\bc
10
\{		bC	BADRPT
12
\{		bC	BADRPT
(-)b/contrib/netbsd-tests/lib/libc/regex/data/subexp.in (-1 / +1 lines)
Lines 12-18 a(b+)c - abbbc abbbc bbb Link Here
12
a(b*)c		-	ac	ac	@c
12
a(b*)c		-	ac	ac	@c
13
(a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
13
(a|ab)(bc([de]+)f|cde)	-	abcdef	abcdef	a,bcdef,de
14
# Begin FreeBSD
14
# Begin FreeBSD
15
a\(b\|c\)d	b	ab|cd	ab|cd	b|c
15
a\(b|c\)d	b	ab|cd	ab|cd	b|c
16
# End FreeBSD
16
# End FreeBSD
17
# the regression tester only asks for 9 subexpressions
17
# the regression tester only asks for 9 subexpressions
18
a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
18
a(b)(c)(d)(e)(f)(g)(h)(i)(j)k	-	abcdefghijk	abcdefghijk	b,c,d,e,f,g,h,i,j
(-)b/lib/libc/regex/regcomp.c (-2 / +50 lines)
Lines 132-137 static void p_b_cclass(struct parse *p, cset *cs); Link Here
132
static void p_b_eclass(struct parse *p, cset *cs);
132
static void p_b_eclass(struct parse *p, cset *cs);
133
static wint_t p_b_symbol(struct parse *p);
133
static wint_t p_b_symbol(struct parse *p);
134
static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
134
static wint_t p_b_coll_elem(struct parse *p, wint_t endc);
135
static int may_escape(struct parse *p, const wint_t ch);
135
static wint_t othercase(wint_t ch);
136
static wint_t othercase(wint_t ch);
136
static void bothcases(struct parse *p, wint_t ch);
137
static void bothcases(struct parse *p, wint_t ch);
137
static void ordinary(struct parse *p, wint_t ch);
138
static void ordinary(struct parse *p, wint_t ch);
Lines 441-447 p_ere_exp(struct parse *p, struct branchc *bc) Link Here
441
			EMIT(OEOW, 0);
442
			EMIT(OEOW, 0);
442
			break;
443
			break;
443
		default:
444
		default:
444
			ordinary(p, wc);
445
			if (may_escape(p, wc) == 0)
446
				ordinary(p, wc);
447
			else
448
				SETERROR(REG_EESCAPE);
445
			break;
449
			break;
446
		}
450
		}
447
		break;
451
		break;
Lines 803-809 p_simp_re(struct parse *p, struct branchc *bc) Link Here
803
			return (false);	/* Definitely not $... */
807
			return (false);	/* Definitely not $... */
804
		p->next--;
808
		p->next--;
805
		wc = WGETNEXT();
809
		wc = WGETNEXT();
806
		ordinary(p, wc);
810
		if ((c & BACKSL) == 0 || may_escape(p, wc) == 0)
811
			ordinary(p, wc);
812
		else
813
			SETERROR(REG_EESCAPE);
807
		break;
814
		break;
808
	}
815
	}
809
816
Lines 1100-1105 p_b_coll_elem(struct parse *p, Link Here
1100
	return(0);
1107
	return(0);
1101
}
1108
}
1102
1109
1110
/*
1111
 - may_escape - determine whether 'ch' is escape-able in the current context
1112
 == static int may_escape(struct parse *p, const wint_t ch)
1113
 */
1114
static int
1115
may_escape(struct parse *p, const wint_t ch)
1116
{
1117
	/*
1118
	 * Build a whitelist of characters that may be escaped to produce an
1119
	 * ordinary in the current context. This assumes that these have not
1120
	 * been otherwise interpreted as a special character. Escaping an
1121
	 * ordinary character yields undefined results according to
1122
	 * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take
1123
	 * advantage of this and use escaped ordinary characters to provide
1124
	 * special meaning, e.g. \b, \B, \w, \W, \s, \S.
1125
	 */
1126
	switch(ch) {
1127
	case '|':
1128
	case '+':
1129
	case '?':
1130
		/* The above characters may not be escaped in BREs */
1131
		if (!(p->g->cflags&REG_EXTENDED))
1132
			return 1;
1133
		/* Fallthrough */
1134
	case '(':
1135
	case ')':
1136
	case '{':
1137
	case '}':
1138
	case '.':
1139
	case '[':
1140
	case ']':
1141
	case '\\':
1142
	case '*':
1143
	case '^':
1144
	case '$':
1145
		return 0;
1146
	default:
1147
		return 1;
1148
	}
1149
}
1150
1103
/*
1151
/*
1104
 - othercase - return the case counterpart of an alphabetic
1152
 - othercase - return the case counterpart of an alphabetic
1105
 == static wint_t othercase(wint_t ch);
1153
 == static wint_t othercase(wint_t ch);

Return to bug 229925