View | Details | Raw Unified | Return to bug 21605
Collapse All | Expand All

(-)make.1 (+28 lines)
Lines 596-601 Link Here
596
The colon may be escaped with a backslash
596
The colon may be escaped with a backslash
597
.Pq Ql \e .
597
.Pq Ql \e .
598
.Bl -tag -width Cm E\&
598
.Bl -tag -width Cm E\&
599
.Sm off
600
.It Cm C No \&/ Ar pattern Xo
601
.No \&/ Ar replacement
602
.No \&/ Op Cm 1g
603
.Xc
604
.Sm on
605
The 
606
.Cm C
607
modifier is just like the
608
.Cm S
609
modifier except that the old and new strings, instead of being
610
simple strings, are a regular expression (see
611
.Xr regex 3 )
612
and an
613
.Xr ed 1 Ns \-style
614
replacement string.  Normally, the first occurrence of the pattern in
615
each word of the value is changed.  The
616
.Ql 1
617
modifier causes the substitution to apply to at most one word; the
618
.Ql g
619
modifier causes the substitution to apply to as many instances of the
620
search pattern as occur in the word or words it is found in.  Note that
621
.Ql 1
622
and
623
.Ql g
624
are orthogonal; the former specifies whether multiple words are
625
potentially affected, the latter whether multiple substitutions can
626
potentially occur within each affected word.
599
.It Cm E
627
.It Cm E
600
Replaces each word in the variable with its suffix.
628
Replaces each word in the variable with its suffix.
601
.It Cm H
629
.It Cm H
(-)var.c (+396 lines)
Lines 86-91 Link Here
86
 */
86
 */
87
87
88
#include    <ctype.h>
88
#include    <ctype.h>
89
#ifndef NO_REGEX
90
#include    <sys/types.h>
91
#include    <regex.h>
92
#endif
89
#include    <stdlib.h>
93
#include    <stdlib.h>
90
#include    "make.h"
94
#include    "make.h"
91
#include    "buf.h"
95
#include    "buf.h"
Lines 144-151 Link Here
144
148
145
/* Var*Pattern flags */
149
/* Var*Pattern flags */
146
#define VAR_SUB_GLOBAL	0x01	/* Apply substitution globally */
150
#define VAR_SUB_GLOBAL	0x01	/* Apply substitution globally */
151
#define VAR_SUB_ONE	0x02	/* Apply substitution to one word */
152
#define VAR_SUB_MATCHED	0x04	/* There was a match */
147
#define VAR_MATCH_START	0x08	/* Match at start of word */
153
#define VAR_MATCH_START	0x08	/* Match at start of word */
148
#define VAR_MATCH_END	0x10	/* Match at end of word */
154
#define VAR_MATCH_END	0x10	/* Match at end of word */
155
#define VAR_NOSUBST	0x20	/* don't expand vars in VarGetPattern */
149
156
150
typedef struct {
157
typedef struct {
151
    char    	  *lhs;	    /* String to match */
158
    char    	  *lhs;	    /* String to match */
Lines 155-160 Link Here
155
    int	    	  flags;
162
    int	    	  flags;
156
} VarPattern;
163
} VarPattern;
157
164
165
#ifndef NO_REGEX
166
typedef struct { 
167
    regex_t	   re; 
168
    int		   nsub;
169
    regmatch_t	  *matches;
170
    char	  *replace;
171
    int		   flags;
172
} VarREPattern;
173
#endif
174
158
static int VarCmp __P((ClientData, ClientData));
175
static int VarCmp __P((ClientData, ClientData));
159
static Var *VarFind __P((char *, GNode *, int));
176
static Var *VarFind __P((char *, GNode *, int));
160
static void VarAdd __P((char *, char *, GNode *));
177
static void VarAdd __P((char *, char *, GNode *));
Lines 168-174 Link Here
168
static Boolean VarSYSVMatch __P((char *, Boolean, Buffer, ClientData));
185
static Boolean VarSYSVMatch __P((char *, Boolean, Buffer, ClientData));
169
#endif
186
#endif
170
static Boolean VarNoMatch __P((char *, Boolean, Buffer, ClientData));
187
static Boolean VarNoMatch __P((char *, Boolean, Buffer, ClientData));
188
#ifndef NO_REGEX
189
static void VarREError __P((int, regex_t *, const char *));
190
static Boolean VarRESubstitute __P((char *, Boolean, Buffer, ClientData));
191
#endif
171
static Boolean VarSubstitute __P((char *, Boolean, Buffer, ClientData));
192
static Boolean VarSubstitute __P((char *, Boolean, Buffer, ClientData));
193
static char *VarGetPattern __P((GNode *, int, char **, int, int *, int *,
194
				VarPattern *));
172
static char *VarQuote __P((char *));
195
static char *VarQuote __P((char *));
173
static char *VarModify __P((char *, Boolean (*)(char *, Boolean, Buffer,
196
static char *VarModify __P((char *, Boolean (*)(char *, Boolean, Buffer,
174
						ClientData),
197
						ClientData),
Lines 1046-1053 Link Here
1046
    return(TRUE);
1069
    return(TRUE);
1047
}
1070
}
1048
1071
1072
#ifndef NO_REGEX
1073
/*-
1074
 *-----------------------------------------------------------------------
1075
 * VarREError --
1076
 *	Print the error caused by a regcomp or regexec call.
1077
 *
1078
 * Results:
1079
 *	None.
1080
 *
1081
 * Side Effects:
1082
 *	An error gets printed.
1083
 *
1084
 *-----------------------------------------------------------------------
1085
 */
1086
static void
1087
VarREError(err, pat, str)
1088
    int err;
1089
    regex_t *pat;
1090
    const char *str;
1091
{
1092
    char *errbuf;
1093
    int errlen;
1094
1095
    errlen = regerror(err, pat, 0, 0);
1096
    errbuf = emalloc(errlen);
1097
    regerror(err, pat, errbuf, errlen);
1098
    Error("%s: %s", str, errbuf);
1099
    free(errbuf);
1100
}
1101
1102
1049
/*-
1103
/*-
1050
 *-----------------------------------------------------------------------
1104
 *-----------------------------------------------------------------------
1105
 * VarRESubstitute --
1106
 *	Perform a regex substitution on the given word, placing the
1107
 *	result in the passed buffer.
1108
 *
1109
 * Results:
1110
 *	TRUE if a space is needed before more characters are added.
1111
 *
1112
 * Side Effects:
1113
 *	None.
1114
 *
1115
 *-----------------------------------------------------------------------
1116
 */
1117
static Boolean
1118
VarRESubstitute(word, addSpace, buf, patternp)
1119
    char *word;
1120
    Boolean addSpace;
1121
    Buffer buf;
1122
    ClientData patternp;
1123
{
1124
    VarREPattern *pat;
1125
    int xrv;
1126
    char *wp;
1127
    char *rp;
1128
    int added;
1129
    int flags = 0;
1130
1131
#define MAYBE_ADD_SPACE()		\
1132
	if (addSpace && !added)		\
1133
	    Buf_AddByte(buf, ' ');	\
1134
	added = 1
1135
1136
    added = 0;
1137
    wp = word;
1138
    pat = patternp;
1139
1140
    if ((pat->flags & (VAR_SUB_ONE|VAR_SUB_MATCHED)) ==
1141
	(VAR_SUB_ONE|VAR_SUB_MATCHED))
1142
	xrv = REG_NOMATCH;
1143
    else {
1144
    tryagain:
1145
	xrv = regexec(&pat->re, wp, pat->nsub, pat->matches, flags);
1146
    }
1147
1148
    switch (xrv) {
1149
    case 0:
1150
	pat->flags |= VAR_SUB_MATCHED;
1151
	if (pat->matches[0].rm_so > 0) {
1152
	    MAYBE_ADD_SPACE();
1153
	    Buf_AddBytes(buf, pat->matches[0].rm_so, wp);
1154
	}
1155
1156
	for (rp = pat->replace; *rp; rp++) {
1157
	    if ((*rp == '\\') && ((rp[1] == '&') || (rp[1] == '\\'))) {
1158
		MAYBE_ADD_SPACE();
1159
		Buf_AddByte(buf,rp[1]);
1160
		rp++;
1161
	    }
1162
	    else if ((*rp == '&') ||
1163
		((*rp == '\\') && isdigit((unsigned char)rp[1]))) {
1164
		int n;
1165
		char *subbuf;
1166
		int sublen;
1167
		char errstr[3];
1168
1169
		if (*rp == '&') {
1170
		    n = 0;
1171
		    errstr[0] = '&';
1172
		    errstr[1] = '\0';
1173
		} else {
1174
		    n = rp[1] - '0';
1175
		    errstr[0] = '\\';
1176
		    errstr[1] = rp[1];
1177
		    errstr[2] = '\0';
1178
		    rp++;
1179
		}
1180
1181
		if (n > pat->nsub) {
1182
		    Error("No subexpression %s", &errstr[0]);
1183
		    subbuf = "";
1184
		    sublen = 0;
1185
		} else if ((pat->matches[n].rm_so == -1) &&
1186
			   (pat->matches[n].rm_eo == -1)) {
1187
		    Error("No match for subexpression %s", &errstr[0]);
1188
		    subbuf = "";
1189
		    sublen = 0;
1190
		} else {
1191
		    subbuf = wp + pat->matches[n].rm_so;
1192
		    sublen = pat->matches[n].rm_eo - pat->matches[n].rm_so;
1193
		}
1194
1195
		if (sublen > 0) {
1196
		    MAYBE_ADD_SPACE();
1197
		    Buf_AddBytes(buf, sublen, subbuf);
1198
		}
1199
	    } else {
1200
		MAYBE_ADD_SPACE();
1201
		Buf_AddByte(buf, *rp);
1202
	    }
1203
	}
1204
	wp += pat->matches[0].rm_eo;
1205
	if (pat->flags & VAR_SUB_GLOBAL) {
1206
	    flags |= REG_NOTBOL;
1207
	    if (pat->matches[0].rm_so == 0 && pat->matches[0].rm_eo == 0) {
1208
		MAYBE_ADD_SPACE();
1209
		Buf_AddByte(buf, *wp);
1210
		wp++;
1211
1212
	    }
1213
	    if (*wp)
1214
		goto tryagain;
1215
	}
1216
	if (*wp) {
1217
	    MAYBE_ADD_SPACE();
1218
	    Buf_AddBytes(buf, strlen(wp), wp);
1219
	}
1220
	break;
1221
    default:
1222
	VarREError(xrv, &pat->re, "Unexpected regex error");
1223
       /* fall through */
1224
    case REG_NOMATCH:
1225
	if (*wp) {
1226
	    MAYBE_ADD_SPACE();
1227
	    Buf_AddBytes(buf,strlen(wp),wp);
1228
	}
1229
	break;
1230
    }
1231
    return(addSpace||added);
1232
}
1233
#endif
1234
1235
1236
/*-
1237
 *-----------------------------------------------------------------------
1051
 * VarModify --
1238
 * VarModify --
1052
 *	Modify each of the words of the passed string using the given
1239
 *	Modify each of the words of the passed string using the given
1053
 *	function. Used to implement all modifiers.
1240
 *	function. Used to implement all modifiers.
Lines 1090-1095 Link Here
1090
1277
1091
/*-
1278
/*-
1092
 *-----------------------------------------------------------------------
1279
 *-----------------------------------------------------------------------
1280
 * VarGetPattern --
1281
 *	Pass through the tstr looking for 1) escaped delimiters,
1282
 *	'$'s and backslashes (place the escaped character in
1283
 *	uninterpreted) and 2) unescaped $'s that aren't before
1284
 *	the delimiter (expand the variable substitution unless flags
1285
 *	has VAR_NOSUBST set).
1286
 *	Return the expanded string or NULL if the delimiter was missing
1287
 *	If pattern is specified, handle escaped ampersands, and replace
1288
 *	unescaped ampersands with the lhs of the pattern.
1289
 *
1290
 * Results:
1291
 *	A string of all the words modified appropriately.
1292
 *	If length is specified, return the string length of the buffer
1293
 *	If flags is specified and the last character of the pattern is a
1294
 *	$ set the VAR_MATCH_END bit of flags.
1295
 *
1296
 * Side Effects:
1297
 *	None.
1298
 *-----------------------------------------------------------------------
1299
 */
1300
static char *
1301
VarGetPattern(ctxt, err, tstr, delim, flags, length, pattern)
1302
    GNode *ctxt;
1303
    int err;
1304
    char **tstr;
1305
    int delim;
1306
    int *flags;
1307
    int *length;
1308
    VarPattern *pattern;
1309
{
1310
    char *cp;
1311
    Buffer buf = Buf_Init(0);
1312
    int junk;
1313
    if (length == NULL)
1314
	length = &junk;
1315
1316
#define IS_A_MATCH(cp, delim) \
1317
    ((cp[0] == '\\') && ((cp[1] == delim) ||  \
1318
     (cp[1] == '\\') || (cp[1] == '$') || (pattern && (cp[1] == '&'))))
1319
1320
    /*
1321
     * Skim through until the matching delimiter is found;
1322
     * pick up variable substitutions on the way. Also allow
1323
     * backslashes to quote the delimiter, $, and \, but don't
1324
     * touch other backslashes.
1325
     */
1326
    for (cp = *tstr; *cp && (*cp != delim); cp++) {
1327
	if (IS_A_MATCH(cp, delim)) {
1328
	    Buf_AddByte(buf, (Byte) cp[1]);
1329
	    cp++;
1330
	} else if (*cp == '$') {
1331
	    if (cp[1] == delim) {
1332
		if (flags == NULL)
1333
		    Buf_AddByte(buf, (Byte) *cp);
1334
		else
1335
		    /*
1336
		     * Unescaped $ at end of pattern => anchor
1337
		     * pattern at end.
1338
		     */
1339
		    *flags |= VAR_MATCH_END;
1340
	    } else {
1341
		if (flags == NULL || (*flags & VAR_NOSUBST) == 0) {
1342
		    char   *cp2;
1343
		    int	    len;
1344
		    Boolean freeIt;
1345
1346
		    /*
1347
		     * If unescaped dollar sign not before the
1348
		     * delimiter, assume it's a variable
1349
		     * substitution and recurse.
1350
		     */
1351
		    cp2 = Var_Parse(cp, ctxt, err, &len, &freeIt);
1352
		    Buf_AddBytes(buf, strlen(cp2), (Byte *) cp2);
1353
		    if (freeIt)
1354
			free(cp2);
1355
		    cp += len - 1;
1356
		} else {
1357
		    char *cp2 = &cp[1];
1358
1359
		    if (*cp2 == '(' || *cp2 == '{') {
1360
			/*
1361
			 * Find the end of this variable reference
1362
			 * and suck it in without further ado.
1363
			 * It will be interperated later.
1364
			 */
1365
			int have = *cp2;
1366
			int want = (*cp2 == '(') ? ')' : '}';
1367
			int depth = 1;
1368
1369
			for (++cp2; *cp2 != '\0' && depth > 0; ++cp2) {
1370
			    if (cp2[-1] != '\\') {
1371
				if (*cp2 == have)
1372
				    ++depth;
1373
				if (*cp2 == want)
1374
				    --depth;
1375
			    }
1376
			}
1377
			Buf_AddBytes(buf, cp2 - cp, (Byte *)cp);
1378
			cp = --cp2;
1379
		    } else
1380
			Buf_AddByte(buf, (Byte) *cp);
1381
		}
1382
	    }
1383
	}
1384
	else if (pattern && *cp == '&')
1385
	    Buf_AddBytes(buf, pattern->leftLen, (Byte *)pattern->lhs);
1386
	else
1387
	    Buf_AddByte(buf, (Byte) *cp);
1388
    }
1389
1390
    Buf_AddByte(buf, (Byte) '\0');
1391
1392
    if (*cp != delim) {
1393
	*tstr = cp;
1394
	*length = 0;
1395
	return NULL;
1396
    }
1397
    else {
1398
	*tstr = ++cp;
1399
	cp = (char *) Buf_GetAll(buf, length);
1400
	*length -= 1;	/* Don't count the NULL */
1401
	Buf_Destroy(buf, FALSE);
1402
	return cp;
1403
    }
1404
}
1405
1406
1407
/*-
1408
 *-----------------------------------------------------------------------
1093
 * VarQuote --
1409
 * VarQuote --
1094
 *	Quote shell meta-characters in the string
1410
 *	Quote shell meta-characters in the string
1095
 *
1411
 *
Lines 1162-1167 Link Here
1162
    int             cnt;	/* Used to count brace pairs when variable in
1478
    int             cnt;	/* Used to count brace pairs when variable in
1163
				 * in parens or braces */
1479
				 * in parens or braces */
1164
    char    	    *start;
1480
    char    	    *start;
1481
    char	     delim;
1165
    Boolean 	    dynamic;	/* TRUE if the variable is local and we're
1482
    Boolean 	    dynamic;	/* TRUE if the variable is local and we're
1166
				 * expanding it in a non-local context. This
1483
				 * expanding it in a non-local context. This
1167
				 * is done to support dynamic sources. The
1484
				 * is done to support dynamic sources. The
Lines 1420-1425 Link Here
1420
     *  	  	    	wildcarding form.
1737
     *  	  	    	wildcarding form.
1421
     *  	  :S<d><pat1><d><pat2><d>[g]
1738
     *  	  :S<d><pat1><d><pat2><d>[g]
1422
     *  	  	    	Substitute <pat2> for <pat1> in the value
1739
     *  	  	    	Substitute <pat2> for <pat1> in the value
1740
     *		  :C<d><pat1><d><pat2><d>[g]
1741
     *				Substitute <pat2> for regex <pat1> in the value
1423
     *  	  :H	    	Substitute the head of each word
1742
     *  	  :H	    	Substitute the head of each word
1424
     *  	  :T	    	Substitute the tail of each word
1743
     *  	  :T	    	Substitute the tail of each word
1425
     *  	  :E	    	Substitute the extension (minus '.') of
1744
     *  	  :E	    	Substitute the extension (minus '.') of
Lines 1667-1672 Link Here
1667
		    free(pattern.rhs);
1986
		    free(pattern.rhs);
1668
		    break;
1987
		    break;
1669
		}
1988
		}
1989
#ifndef NO_REGEX
1990
		case 'C':
1991
		{
1992
		    VarREPattern    pattern;
1993
		    char	   *re;
1994
		    int		    error;
1995
1996
		    pattern.flags = 0;
1997
		    delim = tstr[1];
1998
		    tstr += 2;
1999
2000
		    cp = tstr;
2001
2002
		    if ((re = VarGetPattern(ctxt, err, &cp, delim, NULL,
2003
			NULL, NULL)) == NULL) {
2004
			/* was: goto cleanup */
2005
			*lengthPtr = cp - start + 1;
2006
			if (*freePtr)
2007
			    free(str);
2008
			if (delim != '\0')
2009
			    Error("Unclosed substitution for %s (%c missing)",
2010
				  v->name, delim);
2011
			return (var_Error);
2012
		    }
2013
2014
		    if ((pattern.replace = VarGetPattern(ctxt, err, &cp,
2015
			delim, NULL, NULL, NULL)) == NULL){
2016
			free(re);
2017
2018
			/* was: goto cleanup */
2019
			*lengthPtr = cp - start + 1;
2020
			if (*freePtr)
2021
			    free(str);
2022
			if (delim != '\0')
2023
			    Error("Unclosed substitution for %s (%c missing)",
2024
				  v->name, delim);
2025
			return (var_Error);
2026
		    }
2027
2028
		    for (;; cp++) {
2029
			switch (*cp) {
2030
			case 'g':
2031
			    pattern.flags |= VAR_SUB_GLOBAL;
2032
			    continue;
2033
			case '1':
2034
			    pattern.flags |= VAR_SUB_ONE;
2035
			    continue;
2036
			}
2037
			break;
2038
		    }
2039
2040
		    termc = *cp;
2041
2042
		    error = regcomp(&pattern.re, re, REG_EXTENDED);
2043
		    free(re);
2044
		    if (error)	{
2045
			*lengthPtr = cp - start + 1;
2046
			VarREError(error, &pattern.re, "RE substitution error");
2047
			free(pattern.replace);
2048
			return (var_Error);
2049
		    }
2050
2051
		    pattern.nsub = pattern.re.re_nsub + 1;
2052
		    if (pattern.nsub < 1)
2053
			pattern.nsub = 1;
2054
		    if (pattern.nsub > 10)
2055
			pattern.nsub = 10;
2056
		    pattern.matches = emalloc(pattern.nsub *
2057
					      sizeof(regmatch_t));
2058
		    newStr = VarModify(str, VarRESubstitute,
2059
				       (ClientData) &pattern);
2060
		    regfree(&pattern.re);
2061
		    free(pattern.replace);
2062
		    free(pattern.matches);
2063
		    break;
2064
		}
2065
#endif
1670
		case 'Q':
2066
		case 'Q':
1671
		    if (tstr[1] == endc || tstr[1] == ':') {
2067
		    if (tstr[1] == endc || tstr[1] == ':') {
1672
			newStr = VarQuote (str);
2068
			newStr = VarQuote (str);

Return to bug 21605