Line 0
Link Here
|
|
|
1 |
--- common/util/file.go.orig 2018-11-21 17:52:58 UTC |
2 |
+++ common/util/file.go |
3 |
@@ -9,11 +9,68 @@ |
4 |
import ( |
5 |
"bufio" |
6 |
"io" |
7 |
- "net/url" |
8 |
"os" |
9 |
"path/filepath" |
10 |
+ "strconv" |
11 |
) |
12 |
|
13 |
+// Error reports an error and the operation and URL that caused it. |
14 |
+type Error struct { |
15 |
+ Op string |
16 |
+ URL string |
17 |
+ Err error |
18 |
+} |
19 |
+ |
20 |
+func (e *Error) Error() string { return e.Op + " " + e.URL + ": " + e.Err.Error() } |
21 |
+ |
22 |
+func ishex(c byte) bool { |
23 |
+ switch { |
24 |
+ case '0' <= c && c <= '9': |
25 |
+ return true |
26 |
+ case 'a' <= c && c <= 'f': |
27 |
+ return true |
28 |
+ case 'A' <= c && c <= 'F': |
29 |
+ return true |
30 |
+ } |
31 |
+ return false |
32 |
+} |
33 |
+ |
34 |
+func unhex(c byte) byte { |
35 |
+ switch { |
36 |
+ case '0' <= c && c <= '9': |
37 |
+ return c - '0' |
38 |
+ case 'a' <= c && c <= 'f': |
39 |
+ return c - 'a' + 10 |
40 |
+ case 'A' <= c && c <= 'F': |
41 |
+ return c - 'A' + 10 |
42 |
+ } |
43 |
+ return 0 |
44 |
+} |
45 |
+ |
46 |
+type encoding int |
47 |
+ |
48 |
+const ( |
49 |
+ encodePath encoding = 1 + iota |
50 |
+ encodePathSegment |
51 |
+ encodeHost |
52 |
+ encodeZone |
53 |
+ encodeUserPassword |
54 |
+ encodeQueryComponent |
55 |
+ encodeFragment |
56 |
+) |
57 |
+ |
58 |
+type EscapeError string |
59 |
+ |
60 |
+func (e EscapeError) Error() string { |
61 |
+ return "invalid URL escape " + strconv.Quote(string(e)) |
62 |
+} |
63 |
+ |
64 |
+type InvalidHostError string |
65 |
+ |
66 |
+func (e InvalidHostError) Error() string { |
67 |
+ return "invalid character " + strconv.Quote(string(e)) + " in host name" |
68 |
+} |
69 |
+ |
70 |
// GetFieldsFromFile fetches the first line from the contents of the file |
71 |
// at "path" |
72 |
func GetFieldsFromFile(path string) ([]string, error) { |
73 |
@@ -42,11 +99,11 @@ |
74 |
} |
75 |
|
76 |
func EscapeCollectionName(collName string) string { |
77 |
- return url.PathEscape(collName) |
78 |
+ return PathEscape(collName) |
79 |
} |
80 |
|
81 |
func UnescapeCollectionName(escapedCollName string) (string, error) { |
82 |
- return url.PathUnescape(escapedCollName) |
83 |
+ return PathUnescape(escapedCollName) |
84 |
} |
85 |
|
86 |
type WrappedReadCloser struct { |
87 |
@@ -76,3 +133,238 @@ |
88 |
} |
89 |
return innerErr |
90 |
} |
91 |
+ |
92 |
+// Return true if the specified character should be escaped when |
93 |
+// appearing in a URL string, according to RFC 3986. |
94 |
+// |
95 |
+// Please be informed that for now shouldEscape does not check all |
96 |
+// reserved characters correctly. See golang.org/issue/5684. |
97 |
+func shouldEscape(c byte, mode encoding) bool { |
98 |
+ // §2.3 Unreserved characters (alphanum) |
99 |
+ if 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || '0' <= c && c <= '9' { |
100 |
+ return false |
101 |
+ } |
102 |
+ |
103 |
+ if mode == encodeHost || mode == encodeZone { |
104 |
+ // §3.2.2 Host allows |
105 |
+ // sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" |
106 |
+ // as part of reg-name. |
107 |
+ // We add : because we include :port as part of host. |
108 |
+ // We add [ ] because we include [ipv6]:port as part of host. |
109 |
+ // We add < > because they're the only characters left that |
110 |
+ // we could possibly allow, and Parse will reject them if we |
111 |
+ // escape them (because hosts can't use %-encoding for |
112 |
+ // ASCII bytes). |
113 |
+ switch c { |
114 |
+ case '!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', ':', '[', ']', '<', '>', '"': |
115 |
+ return false |
116 |
+ } |
117 |
+ } |
118 |
+ |
119 |
+ switch c { |
120 |
+ case '-', '_', '.', '~': // §2.3 Unreserved characters (mark) |
121 |
+ return false |
122 |
+ |
123 |
+ case '$', '&', '+', ',', '/', ':', ';', '=', '?', '@': // §2.2 Reserved characters (reserved) |
124 |
+ // Different sections of the URL allow a few of |
125 |
+ // the reserved characters to appear unescaped. |
126 |
+ switch mode { |
127 |
+ case encodePath: // §3.3 |
128 |
+ // The RFC allows : @ & = + $ but saves / ; , for assigning |
129 |
+ // meaning to individual path segments. This package |
130 |
+ // only manipulates the path as a whole, so we allow those |
131 |
+ // last three as well. That leaves only ? to escape. |
132 |
+ return c == '?' |
133 |
+ |
134 |
+ case encodePathSegment: // §3.3 |
135 |
+ // The RFC allows : @ & = + $ but saves / ; , for assigning |
136 |
+ // meaning to individual path segments. |
137 |
+ return c == '/' || c == ';' || c == ',' || c == '?' |
138 |
+ |
139 |
+ case encodeUserPassword: // §3.2.1 |
140 |
+ // The RFC allows ';', ':', '&', '=', '+', '$', and ',' in |
141 |
+ // userinfo, so we must escape only '@', '/', and '?'. |
142 |
+ // The parsing of userinfo treats ':' as special so we must escape |
143 |
+ // that too. |
144 |
+ return c == '@' || c == '/' || c == '?' || c == ':' |
145 |
+ |
146 |
+ case encodeQueryComponent: // §3.4 |
147 |
+ // The RFC reserves (so we must escape) everything. |
148 |
+ return true |
149 |
+ |
150 |
+ case encodeFragment: // §4.1 |
151 |
+ // The RFC text is silent but the grammar allows |
152 |
+ // everything, so escape nothing. |
153 |
+ return false |
154 |
+ } |
155 |
+ } |
156 |
+ |
157 |
+ if mode == encodeFragment { |
158 |
+ // RFC 3986 §2.2 allows not escaping sub-delims. A subset of sub-delims are |
159 |
+ // included in reserved from RFC 2396 §2.2. The remaining sub-delims do not |
160 |
+ // need to be escaped. To minimize potential breakage, we apply two restrictions: |
161 |
+ // (1) we always escape sub-delims outside of the fragment, and (2) we always |
162 |
+ // escape single quote to avoid breaking callers that had previously assumed that |
163 |
+ // single quotes would be escaped. See issue #19917. |
164 |
+ switch c { |
165 |
+ case '!', '(', ')', '*': |
166 |
+ return false |
167 |
+ } |
168 |
+ } |
169 |
+ |
170 |
+ // Everything else must be escaped. |
171 |
+ return true |
172 |
+} |
173 |
+ |
174 |
+// PathUnescape does the inverse transformation of PathEscape, |
175 |
+// converting each 3-byte encoded substring of the form "%AB" into the |
176 |
+// hex-decoded byte 0xAB. It returns an error if any % is not followed |
177 |
+// by two hexadecimal digits. |
178 |
+// |
179 |
+// PathUnescape is identical to QueryUnescape except that it does not |
180 |
+// unescape '+' to ' ' (space). |
181 |
+func PathUnescape(s string) (string, error) { |
182 |
+ return unescape(s, encodePathSegment) |
183 |
+} |
184 |
+ |
185 |
+// unescape unescapes a string; the mode specifies |
186 |
+// which section of the URL string is being unescaped. |
187 |
+func unescape(s string, mode encoding) (string, error) { |
188 |
+ // Count %, check that they're well-formed. |
189 |
+ n := 0 |
190 |
+ hasPlus := false |
191 |
+ for i := 0; i < len(s); { |
192 |
+ switch s[i] { |
193 |
+ case '%': |
194 |
+ n++ |
195 |
+ if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { |
196 |
+ s = s[i:] |
197 |
+ if len(s) > 3 { |
198 |
+ s = s[:3] |
199 |
+ } |
200 |
+ return "", EscapeError(s) |
201 |
+ } |
202 |
+ // Per https://tools.ietf.org/html/rfc3986#page-21 |
203 |
+ // in the host component %-encoding can only be used |
204 |
+ // for non-ASCII bytes. |
205 |
+ // But https://tools.ietf.org/html/rfc6874#section-2 |
206 |
+ // introduces %25 being allowed to escape a percent sign |
207 |
+ // in IPv6 scoped-address literals. Yay. |
208 |
+ if mode == encodeHost && unhex(s[i+1]) < 8 && s[i:i+3] != "%25" { |
209 |
+ return "", EscapeError(s[i : i+3]) |
210 |
+ } |
211 |
+ if mode == encodeZone { |
212 |
+ // RFC 6874 says basically "anything goes" for zone identifiers |
213 |
+ // and that even non-ASCII can be redundantly escaped, |
214 |
+ // but it seems prudent to restrict %-escaped bytes here to those |
215 |
+ // that are valid host name bytes in their unescaped form. |
216 |
+ // That is, you can use escaping in the zone identifier but not |
217 |
+ // to introduce bytes you couldn't just write directly. |
218 |
+ // But Windows puts spaces here! Yay. |
219 |
+ v := unhex(s[i+1])<<4 | unhex(s[i+2]) |
220 |
+ if s[i:i+3] != "%25" && v != ' ' && shouldEscape(v, encodeHost) { |
221 |
+ return "", EscapeError(s[i : i+3]) |
222 |
+ } |
223 |
+ } |
224 |
+ i += 3 |
225 |
+ case '+': |
226 |
+ hasPlus = mode == encodeQueryComponent |
227 |
+ i++ |
228 |
+ default: |
229 |
+ if (mode == encodeHost || mode == encodeZone) && s[i] < 0x80 && shouldEscape(s[i], mode) { |
230 |
+ return "", InvalidHostError(s[i : i+1]) |
231 |
+ } |
232 |
+ i++ |
233 |
+ } |
234 |
+ } |
235 |
+ |
236 |
+ if n == 0 && !hasPlus { |
237 |
+ return s, nil |
238 |
+ } |
239 |
+ |
240 |
+ t := make([]byte, len(s)-2*n) |
241 |
+ j := 0 |
242 |
+ for i := 0; i < len(s); { |
243 |
+ switch s[i] { |
244 |
+ case '%': |
245 |
+ t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) |
246 |
+ j++ |
247 |
+ i += 3 |
248 |
+ case '+': |
249 |
+ if mode == encodeQueryComponent { |
250 |
+ t[j] = ' ' |
251 |
+ } else { |
252 |
+ t[j] = '+' |
253 |
+ } |
254 |
+ j++ |
255 |
+ i++ |
256 |
+ default: |
257 |
+ t[j] = s[i] |
258 |
+ j++ |
259 |
+ i++ |
260 |
+ } |
261 |
+ } |
262 |
+ return string(t), nil |
263 |
+} |
264 |
+ |
265 |
+// PathEscape escapes the string so it can be safely placed |
266 |
+// inside a URL path segment. |
267 |
+func PathEscape(s string) string { |
268 |
+ return escape(s, encodePathSegment) |
269 |
+} |
270 |
+ |
271 |
+func escape(s string, mode encoding) string { |
272 |
+ spaceCount, hexCount := 0, 0 |
273 |
+ for i := 0; i < len(s); i++ { |
274 |
+ c := s[i] |
275 |
+ if shouldEscape(c, mode) { |
276 |
+ if c == ' ' && mode == encodeQueryComponent { |
277 |
+ spaceCount++ |
278 |
+ } else { |
279 |
+ hexCount++ |
280 |
+ } |
281 |
+ } |
282 |
+ } |
283 |
+ |
284 |
+ if spaceCount == 0 && hexCount == 0 { |
285 |
+ return s |
286 |
+ } |
287 |
+ |
288 |
+ var buf [64]byte |
289 |
+ var t []byte |
290 |
+ |
291 |
+ required := len(s) + 2*hexCount |
292 |
+ if required <= len(buf) { |
293 |
+ t = buf[:required] |
294 |
+ } else { |
295 |
+ t = make([]byte, required) |
296 |
+ } |
297 |
+ |
298 |
+ if hexCount == 0 { |
299 |
+ copy(t, s) |
300 |
+ for i := 0; i < len(s); i++ { |
301 |
+ if s[i] == ' ' { |
302 |
+ t[i] = '+' |
303 |
+ } |
304 |
+ } |
305 |
+ return string(t) |
306 |
+ } |
307 |
+ |
308 |
+ j := 0 |
309 |
+ for i := 0; i < len(s); i++ { |
310 |
+ switch c := s[i]; { |
311 |
+ case c == ' ' && mode == encodeQueryComponent: |
312 |
+ t[j] = '+' |
313 |
+ j++ |
314 |
+ case shouldEscape(c, mode): |
315 |
+ t[j] = '%' |
316 |
+ t[j+1] = "0123456789ABCDEF"[c>>4] |
317 |
+ t[j+2] = "0123456789ABCDEF"[c&15] |
318 |
+ j += 3 |
319 |
+ default: |
320 |
+ t[j] = s[i] |
321 |
+ j++ |
322 |
+ } |
323 |
+ } |
324 |
+ return string(t) |
325 |
+} |