Lines 1-304
Link Here
|
1 |
Index: /icu/branches/maint/maint-3-8/source/i18n/regexcmp.cpp |
|
|
2 |
=================================================================== |
3 |
--- i18n/regexcmp.cpp (revision 21805) |
4 |
+++ i18n/regexcmp.cpp (revision 23292) |
5 |
@@ -3,5 +3,5 @@ |
6 |
// file: regexcmp.cpp |
7 |
// |
8 |
-// Copyright (C) 2002-2007 International Business Machines Corporation and others. |
9 |
+// Copyright (C) 2002-2008 International Business Machines Corporation and others. |
10 |
// All Rights Reserved. |
11 |
// |
12 |
@@ -1187,12 +1187,15 @@ |
13 |
// we fill the operand with the capture group number. At the end |
14 |
// of compilation, it will be changed to the variable's location. |
15 |
- U_ASSERT(groupNum > 0); |
16 |
- int32_t op; |
17 |
- if (fModeFlags & UREGEX_CASE_INSENSITIVE) { |
18 |
- op = URX_BUILD(URX_BACKREF_I, groupNum); |
19 |
+ if (groupNum < 1) { |
20 |
+ error(U_REGEX_INVALID_BACK_REF); |
21 |
} else { |
22 |
- op = URX_BUILD(URX_BACKREF, groupNum); |
23 |
- } |
24 |
- fRXPat->fCompiledPat->addElement(op, *fStatus); |
25 |
+ int32_t op; |
26 |
+ if (fModeFlags & UREGEX_CASE_INSENSITIVE) { |
27 |
+ op = URX_BUILD(URX_BACKREF_I, groupNum); |
28 |
+ } else { |
29 |
+ op = URX_BUILD(URX_BACKREF, groupNum); |
30 |
+ } |
31 |
+ fRXPat->fCompiledPat->addElement(op, *fStatus); |
32 |
+ } |
33 |
} |
34 |
break; |
35 |
Index: /icu/branches/maint/maint-3-8/source/i18n/rematch.cpp |
36 |
=================================================================== |
37 |
--- i18n/rematch.cpp (revision 21973) |
38 |
+++ i18n/rematch.cpp (revision 23292) |
39 |
@@ -1,5 +1,5 @@ |
40 |
/* |
41 |
************************************************************************** |
42 |
-* Copyright (C) 2002-2007 International Business Machines Corporation * |
43 |
+* Copyright (C) 2002-2008 International Business Machines Corporation * |
44 |
* and others. All rights reserved. * |
45 |
************************************************************************** |
46 |
@@ -30,4 +30,13 @@ |
47 |
|
48 |
U_NAMESPACE_BEGIN |
49 |
+ |
50 |
+// Limit the size of the back track stack, to avoid system failures caused |
51 |
+// by heap exhaustion. Units are in 32 bit words, not bytes. |
52 |
+// This value puts ICU's limits higher than most other regexp implementations, |
53 |
+// which use recursion rather than the heap, and take more storage per |
54 |
+// backtrack point. |
55 |
+// This constant is _temporary_. Proper API to control the value will added. |
56 |
+// |
57 |
+static const int32_t BACKTRACK_STACK_CAPACITY = 8000000; |
58 |
|
59 |
//----------------------------------------------------------------------------- |
60 |
@@ -54,6 +63,7 @@ |
61 |
if (fStack == NULL || fData == NULL) { |
62 |
fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; |
63 |
- } |
64 |
- |
65 |
+ } else { |
66 |
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); |
67 |
+ } |
68 |
reset(RegexStaticSets::gStaticSets->fEmptyString); |
69 |
} |
70 |
@@ -79,4 +89,6 @@ |
71 |
if (fStack == NULL || fData == NULL) { |
72 |
status = U_MEMORY_ALLOCATION_ERROR; |
73 |
+ } else { |
74 |
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); |
75 |
} |
76 |
reset(input); |
77 |
@@ -103,4 +115,6 @@ |
78 |
if (fStack == NULL || fData == NULL) { |
79 |
status = U_MEMORY_ALLOCATION_ERROR; |
80 |
+ } else { |
81 |
+ fStack->setMaxCapacity(BACKTRACK_STACK_CAPACITY); |
82 |
} |
83 |
reset(RegexStaticSets::gStaticSets->fEmptyString); |
84 |
@@ -1015,4 +1029,12 @@ |
85 |
// push storage for a new frame. |
86 |
int32_t *newFP = fStack->reserveBlock(frameSize, status); |
87 |
+ if (newFP == NULL) { |
88 |
+ // Heap allocation error on attempted stack expansion. |
89 |
+ // We need to return a writable stack frame, so just return the |
90 |
+ // previous frame. The match operation will stop quickly |
91 |
+ // becuase of the error status, after which the frame will never |
92 |
+ // be looked at again. |
93 |
+ return fp; |
94 |
+ } |
95 |
fp = (REStackFrame *)(newFP - frameSize); // in case of realloc of stack. |
96 |
|
97 |
@@ -1030,6 +1052,6 @@ |
98 |
return (REStackFrame *)newFP; |
99 |
} |
100 |
- |
101 |
- |
102 |
+ |
103 |
+ |
104 |
//-------------------------------------------------------------------------------- |
105 |
// |
106 |
@@ -2262,4 +2284,5 @@ |
107 |
|
108 |
if (U_FAILURE(status)) { |
109 |
+ isMatch = FALSE; |
110 |
break; |
111 |
} |
112 |
Index: /icu/branches/maint/maint-3-8/source/test/intltest/regextst.h |
113 |
=================================================================== |
114 |
--- test/intltest/regextst.h (revision 22001) |
115 |
+++ test/intltest/regextst.h (revision 23292) |
116 |
@@ -1,5 +1,5 @@ |
117 |
/******************************************************************** |
118 |
* COPYRIGHT: |
119 |
- * Copyright (c) 2002-2007, International Business Machines Corporation and |
120 |
+ * Copyright (c) 2002-2008, International Business Machines Corporation and |
121 |
* others. All Rights Reserved. |
122 |
********************************************************************/ |
123 |
@@ -31,4 +31,5 @@ |
124 |
virtual void Errors(); |
125 |
virtual void PerlTests(); |
126 |
+ virtual void Bug6149(); |
127 |
|
128 |
// The following functions are internal to the regexp tests. |
129 |
Index: /icu/branches/maint/maint-3-8/source/test/intltest/regextst.cpp |
130 |
=================================================================== |
131 |
--- test/intltest/regextst.cpp (revision 22057) |
132 |
+++ test/intltest/regextst.cpp (revision 23292) |
133 |
@@ -1,5 +1,5 @@ |
134 |
/******************************************************************** |
135 |
* COPYRIGHT: |
136 |
- * Copyright (c) 2002-2007, International Business Machines Corporation and |
137 |
+ * Copyright (c) 2002-2008, International Business Machines Corporation and |
138 |
* others. All Rights Reserved. |
139 |
********************************************************************/ |
140 |
@@ -67,4 +67,8 @@ |
141 |
if (exec) PerlTests(); |
142 |
break; |
143 |
+ case 7: name = "Bug 6149"; |
144 |
+ if (exec) Bug6149(); |
145 |
+ break; |
146 |
+ |
147 |
|
148 |
|
149 |
@@ -1640,4 +1644,10 @@ |
150 |
// Ticket 5389 |
151 |
REGEX_ERR("*c", 1, 1, U_REGEX_RULE_SYNTAX); |
152 |
+ |
153 |
+ // Invalid Back Reference \0 |
154 |
+ // For ICU 3.8 and earlier |
155 |
+ // For ICU versions newer than 3.8, \0 introduces an octal escape. |
156 |
+ // |
157 |
+ REGEX_ERR("(ab)\\0", 1, 6, U_REGEX_INVALID_BACK_REF); |
158 |
|
159 |
} |
160 |
@@ -2123,4 +2133,24 @@ |
161 |
|
162 |
|
163 |
+//-------------------------------------------------------------- |
164 |
+// |
165 |
+// Bug6149 Verify limits to heap expansion for backtrack stack. |
166 |
+// Use this pattern, |
167 |
+// "(a?){1,}" |
168 |
+// The zero-length match will repeat forever. |
169 |
+// (That this goes into a loop is another bug) |
170 |
+// |
171 |
+//--------------------------------------------------------------- |
172 |
+void RegexTest::Bug6149() { |
173 |
+ UnicodeString pattern("(a?){1,}"); |
174 |
+ UnicodeString s("xyz"); |
175 |
+ uint32_t flags = 0; |
176 |
+ UErrorCode status = U_ZERO_ERROR; |
177 |
+ |
178 |
+ RegexMatcher matcher(pattern, s, flags, status); |
179 |
+ UBool result = false; |
180 |
+ REGEX_ASSERT_FAIL(result=matcher.matches(status), U_BUFFER_OVERFLOW_ERROR); |
181 |
+ REGEX_ASSERT(result == FALSE); |
182 |
+ } |
183 |
|
184 |
#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
185 |
Index: /icu/branches/maint/maint-3-8/source/common/uvectr32.cpp |
186 |
=================================================================== |
187 |
--- common/uvectr32.cpp (revision 12958) |
188 |
+++ common/uvectr32.cpp (revision 23292) |
189 |
@@ -1,5 +1,5 @@ |
190 |
/* |
191 |
****************************************************************************** |
192 |
-* Copyright (C) 1999-2003, International Business Machines Corporation and * |
193 |
+* Copyright (C) 1999-2008, International Business Machines Corporation and * |
194 |
* others. All Rights Reserved. * |
195 |
****************************************************************************** |
196 |
@@ -27,4 +27,5 @@ |
197 |
count(0), |
198 |
capacity(0), |
199 |
+ maxCapacity(0), |
200 |
elements(NULL) |
201 |
{ |
202 |
@@ -35,4 +36,5 @@ |
203 |
count(0), |
204 |
capacity(0), |
205 |
+ maxCapacity(0), |
206 |
elements(0) |
207 |
{ |
208 |
@@ -46,4 +48,7 @@ |
209 |
if (initialCapacity < 1) { |
210 |
initialCapacity = DEFUALT_CAPACITY; |
211 |
+ } |
212 |
+ if (maxCapacity>0 && maxCapacity<initialCapacity) { |
213 |
+ initialCapacity = maxCapacity; |
214 |
} |
215 |
elements = (int32_t *)uprv_malloc(sizeof(int32_t)*initialCapacity); |
216 |
@@ -190,19 +195,33 @@ |
217 |
if (capacity >= minimumCapacity) { |
218 |
return TRUE; |
219 |
- } else { |
220 |
- int32_t newCap = capacity * 2; |
221 |
- if (newCap < minimumCapacity) { |
222 |
- newCap = minimumCapacity; |
223 |
- } |
224 |
- int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); |
225 |
- if (newElems == 0) { |
226 |
- status = U_MEMORY_ALLOCATION_ERROR; |
227 |
- return FALSE; |
228 |
- } |
229 |
- uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); |
230 |
- uprv_free(elements); |
231 |
- elements = newElems; |
232 |
- capacity = newCap; |
233 |
- return TRUE; |
234 |
+ } |
235 |
+ if (maxCapacity>0 && minimumCapacity>maxCapacity) { |
236 |
+ status = U_BUFFER_OVERFLOW_ERROR; |
237 |
+ return FALSE; |
238 |
+ } |
239 |
+ int32_t newCap = capacity * 2; |
240 |
+ if (newCap < minimumCapacity) { |
241 |
+ newCap = minimumCapacity; |
242 |
+ } |
243 |
+ if (maxCapacity > 0 && newCap > maxCapacity) { |
244 |
+ newCap = maxCapacity; |
245 |
+ } |
246 |
+ int32_t* newElems = (int32_t *)uprv_malloc(sizeof(int32_t)*newCap); |
247 |
+ if (newElems == 0) { |
248 |
+ status = U_MEMORY_ALLOCATION_ERROR; |
249 |
+ return FALSE; |
250 |
+ } |
251 |
+ uprv_memcpy(newElems, elements, sizeof(elements[0]) * count); |
252 |
+ uprv_free(elements); |
253 |
+ elements = newElems; |
254 |
+ capacity = newCap; |
255 |
+ return TRUE; |
256 |
+} |
257 |
+ |
258 |
+void UVector32::setMaxCapacity(int32_t limit) { |
259 |
+ U_ASSERT(limit >= 0); |
260 |
+ maxCapacity = limit; |
261 |
+ if (maxCapacity < 0) { |
262 |
+ maxCapacity = 0; |
263 |
} |
264 |
} |
265 |
Index: /icu/branches/maint/maint-3-8/source/common/uvectr32.h |
266 |
=================================================================== |
267 |
--- common/uvectr32.h (revision 19000) |
268 |
+++ common/uvectr32.h (revision 23292) |
269 |
@@ -1,5 +1,5 @@ |
270 |
/* |
271 |
********************************************************************** |
272 |
-* Copyright (C) 1999-2006, International Business Machines |
273 |
+* Copyright (C) 1999-2008, International Business Machines |
274 |
* Corporation and others. All Rights Reserved. |
275 |
********************************************************************** |
276 |
@@ -62,4 +62,6 @@ |
277 |
|
278 |
int32_t capacity; |
279 |
+ |
280 |
+ int32_t maxCapacity; // Limit beyond which capacity is not permitted to grow. |
281 |
|
282 |
int32_t* elements; |
283 |
@@ -161,4 +163,12 @@ |
284 |
*/ |
285 |
int32_t *getBuffer() const; |
286 |
+ |
287 |
+ /** |
288 |
+ * Set the maximum allowed buffer capacity for this vector/stack. |
289 |
+ * Default with no limit set is unlimited, go until malloc() fails. |
290 |
+ * A Limit of zero means unlimited capacity. |
291 |
+ * Units are vector elements (32 bits each), not bytes. |
292 |
+ */ |
293 |
+ void setMaxCapacity(int32_t limit); |
294 |
|
295 |
/** |
296 |
@@ -222,5 +232,7 @@ |
297 |
|
298 |
inline int32_t *UVector32::reserveBlock(int32_t size, UErrorCode &status) { |
299 |
- ensureCapacity(count+size, status); |
300 |
+ if (ensureCapacity(count+size, status) == FALSE) { |
301 |
+ return NULL; |
302 |
+ } |
303 |
int32_t *rp = elements+count; |
304 |
count += size; |