diff --git a/contrib/netbsd-tests/lib/libc/regex/data/meta.in b/contrib/netbsd-tests/lib/libc/regex/data/meta.in index 4533d3591bc..eb24075aea6 100644 --- a/contrib/netbsd-tests/lib/libc/regex/data/meta.in +++ b/contrib/netbsd-tests/lib/libc/regex/data/meta.in @@ -4,7 +4,9 @@ a[bc]d & abd abd a\*c & a*c a*c a\\b & a\b a\b a\\\*b & a\*b a\*b -a\bc & abc abc +# Begin FreeBSD +a\bc &C EESCAPE +# End FreeBSD a\ &C EESCAPE a\\bc & a\bc a\bc \{ bC BADRPT diff --git a/contrib/netbsd-tests/lib/libc/regex/data/subexp.in b/contrib/netbsd-tests/lib/libc/regex/data/subexp.in index d3efe2eab27..e3d376bb7cb 100644 --- a/contrib/netbsd-tests/lib/libc/regex/data/subexp.in +++ b/contrib/netbsd-tests/lib/libc/regex/data/subexp.in @@ -12,7 +12,7 @@ a(b+)c - abbbc abbbc bbb a(b*)c - ac ac @c (a|ab)(bc([de]+)f|cde) - abcdef abcdef a,bcdef,de # Begin FreeBSD -a\(b\|c\)d b ab|cd ab|cd b|c +a\(b|c\)d b ab|cd ab|cd b|c # End FreeBSD # the regression tester only asks for 9 subexpressions a(b)(c)(d)(e)(f)(g)(h)(i)(j)k - abcdefghijk abcdefghijk b,c,d,e,f,g,h,i,j diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index 586621c5a74..31156830cff 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -132,6 +132,7 @@ static void p_b_cclass(struct parse *p, cset *cs); static void p_b_eclass(struct parse *p, cset *cs); static wint_t p_b_symbol(struct parse *p); static wint_t p_b_coll_elem(struct parse *p, wint_t endc); +static int may_escape(struct parse *p, const wint_t ch); static wint_t othercase(wint_t ch); static void bothcases(struct parse *p, wint_t ch); static void ordinary(struct parse *p, wint_t ch); @@ -441,7 +442,10 @@ p_ere_exp(struct parse *p, struct branchc *bc) EMIT(OEOW, 0); break; default: - ordinary(p, wc); + if (may_escape(p, wc) == 0) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } break; @@ -803,7 +807,10 @@ p_simp_re(struct parse *p, struct branchc *bc) return (false); /* Definitely not $... */ p->next--; wc = WGETNEXT(); - ordinary(p, wc); + if ((c & BACKSL) == 0 || may_escape(p, wc) == 0) + ordinary(p, wc); + else + SETERROR(REG_EESCAPE); break; } @@ -1100,6 +1107,53 @@ p_b_coll_elem(struct parse *p, return(0); } +/* + - may_escape - determine whether 'ch' is escape-able in the current context + == static int may_escape(struct parse *p, const wint_t ch) + */ +static int +may_escape(struct parse *p, const wint_t ch) +{ + + if (isalpha(ch) || ch == '\'' || ch == '`') + return (1); + return (0); +#ifdef NOTYET + /* + * Build a whitelist of characters that may be escaped to produce an + * ordinary in the current context. This assumes that these have not + * been otherwise interpreted as a special character. Escaping an + * ordinary character yields undefined results according to + * IEEE 1003.1-2008. Some extensions (notably, some GNU extensions) take + * advantage of this and use escaped ordinary characters to provide + * special meaning, e.g. \b, \B, \w, \W, \s, \S. + */ + switch(ch) { + case '|': + case '+': + case '?': + /* The above characters may not be escaped in BREs */ + if (!(p->g->cflags®_EXTENDED)) + return 1; + /* Fallthrough */ + case '(': + case ')': + case '{': + case '}': + case '.': + case '[': + case ']': + case '\\': + case '*': + case '^': + case '$': + return 0; + default: + return 1; + } +#endif +} + /* - othercase - return the case counterpart of an alphabetic == static wint_t othercase(wint_t ch);