OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/http/http_content_disposition.h" | 5 #include "net/http/http_content_disposition.h" |
6 | 6 |
7 #include "base/base64.h" | 7 #include "base/base64.h" |
8 #include "base/i18n/icu_string_conversions.h" | 8 #include "base/i18n/icu_string_conversions.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/string_util.h" | 10 #include "base/string_util.h" |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 ucnv_close(converter); | 88 ucnv_close(converter); |
89 if (U_FAILURE(err)) | 89 if (U_FAILURE(err)) |
90 return false; | 90 return false; |
91 output->resize(output_length); | 91 output->resize(output_length); |
92 return true; | 92 return true; |
93 } | 93 } |
94 | 94 |
95 bool DecodeWord(const std::string& encoded_word, | 95 bool DecodeWord(const std::string& encoded_word, |
96 const std::string& referrer_charset, | 96 const std::string& referrer_charset, |
97 bool* is_rfc2047, | 97 bool* is_rfc2047, |
98 std::string* output) { | 98 std::string* output, |
| 99 int* parse_result_flags) { |
99 *is_rfc2047 = false; | 100 *is_rfc2047 = false; |
100 output->clear(); | 101 output->clear(); |
101 if (encoded_word.empty()) | 102 if (encoded_word.empty()) |
102 return true; | 103 return true; |
103 | 104 |
104 if (!IsStringASCII(encoded_word)) { | 105 if (!IsStringASCII(encoded_word)) { |
105 // Try UTF-8, referrer_charset and the native OS default charset in turn. | 106 // Try UTF-8, referrer_charset and the native OS default charset in turn. |
106 if (IsStringUTF8(encoded_word)) { | 107 if (IsStringUTF8(encoded_word)) { |
107 *output = encoded_word; | 108 *output = encoded_word; |
108 } else { | 109 } else { |
109 string16 utf16_output; | 110 string16 utf16_output; |
110 if (!referrer_charset.empty() && | 111 if (!referrer_charset.empty() && |
111 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), | 112 base::CodepageToUTF16(encoded_word, referrer_charset.c_str(), |
112 base::OnStringConversionError::FAIL, | 113 base::OnStringConversionError::FAIL, |
113 &utf16_output)) { | 114 &utf16_output)) { |
114 *output = UTF16ToUTF8(utf16_output); | 115 *output = UTF16ToUTF8(utf16_output); |
115 } else { | 116 } else { |
116 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); | 117 *output = WideToUTF8(base::SysNativeMBToWide(encoded_word)); |
117 } | 118 } |
118 } | 119 } |
119 | 120 |
| 121 *parse_result_flags |= net::HttpContentDisposition::HAS_NON_ASCII_STRINGS; |
120 return true; | 122 return true; |
121 } | 123 } |
122 | 124 |
123 // RFC 2047 : one of encoding methods supported by Firefox and relatively | 125 // RFC 2047 : one of encoding methods supported by Firefox and relatively |
124 // widely used by web servers. | 126 // widely used by web servers. |
125 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. | 127 // =?charset?<E>?<encoded string>?= where '<E>' is either 'B' or 'Q'. |
126 // We don't care about the length restriction (72 bytes) because | 128 // We don't care about the length restriction (72 bytes) because |
127 // many web servers generate encoded words longer than the limit. | 129 // many web servers generate encoded words longer than the limit. |
128 std::string tmp; | 130 std::string decoded_word; |
129 *is_rfc2047 = true; | 131 *is_rfc2047 = true; |
130 int part_index = 0; | 132 int part_index = 0; |
131 std::string charset; | 133 std::string charset; |
132 StringTokenizer t(encoded_word, "?"); | 134 StringTokenizer t(encoded_word, "?"); |
133 RFC2047EncodingType enc_type = Q_ENCODING; | 135 RFC2047EncodingType enc_type = Q_ENCODING; |
134 while (*is_rfc2047 && t.GetNext()) { | 136 while (*is_rfc2047 && t.GetNext()) { |
135 std::string part = t.token(); | 137 std::string part = t.token(); |
136 switch (part_index) { | 138 switch (part_index) { |
137 case 0: | 139 case 0: |
138 if (part != "=") { | 140 if (part != "=") { |
(...skipping 12 matching lines...) Expand all Loading... |
151 part.find_first_of("bBqQ") == std::string::npos) { | 153 part.find_first_of("bBqQ") == std::string::npos) { |
152 *is_rfc2047 = false; | 154 *is_rfc2047 = false; |
153 break; | 155 break; |
154 } | 156 } |
155 if (part[0] == 'b' || part[0] == 'B') { | 157 if (part[0] == 'b' || part[0] == 'B') { |
156 enc_type = B_ENCODING; | 158 enc_type = B_ENCODING; |
157 } | 159 } |
158 ++part_index; | 160 ++part_index; |
159 break; | 161 break; |
160 case 3: | 162 case 3: |
161 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &tmp); | 163 *is_rfc2047 = DecodeBQEncoding(part, enc_type, charset, &decoded_word); |
162 if (!*is_rfc2047) { | 164 if (!*is_rfc2047) { |
163 // Last minute failure. Invalid B/Q encoding. Rather than | 165 // Last minute failure. Invalid B/Q encoding. Rather than |
164 // passing it through, return now. | 166 // passing it through, return now. |
165 return false; | 167 return false; |
166 } | 168 } |
167 ++part_index; | 169 ++part_index; |
168 break; | 170 break; |
169 case 4: | 171 case 4: |
170 if (part != "=") { | 172 if (part != "=") { |
171 // Another last minute failure ! | 173 // Another last minute failure ! |
172 // Likely to be a case of two encoded-words in a row or | 174 // Likely to be a case of two encoded-words in a row or |
173 // an encoded word followed by a non-encoded word. We can be | 175 // an encoded word followed by a non-encoded word. We can be |
174 // generous, but it does not help much in terms of compatibility, | 176 // generous, but it does not help much in terms of compatibility, |
175 // I believe. Return immediately. | 177 // I believe. Return immediately. |
176 *is_rfc2047 = false; | 178 *is_rfc2047 = false; |
177 return false; | 179 return false; |
178 } | 180 } |
179 ++part_index; | 181 ++part_index; |
180 break; | 182 break; |
181 default: | 183 default: |
182 *is_rfc2047 = false; | 184 *is_rfc2047 = false; |
183 return false; | 185 return false; |
184 } | 186 } |
185 } | 187 } |
186 | 188 |
187 if (*is_rfc2047) { | 189 if (*is_rfc2047) { |
188 if (*(encoded_word.end() - 1) == '=') { | 190 if (*(encoded_word.end() - 1) == '=') { |
189 output->swap(tmp); | 191 output->swap(decoded_word); |
| 192 *parse_result_flags |= |
| 193 net::HttpContentDisposition::HAS_RFC2047_ENCODED_STRINGS; |
190 return true; | 194 return true; |
191 } | 195 } |
192 // encoded_word ending prematurelly with '?' or extra '?' | 196 // encoded_word ending prematurelly with '?' or extra '?' |
193 *is_rfc2047 = false; | 197 *is_rfc2047 = false; |
194 return false; | 198 return false; |
195 } | 199 } |
196 | 200 |
197 // We're not handling 'especial' characters quoted with '\', but | 201 // We're not handling 'especial' characters quoted with '\', but |
198 // it should be Ok because we're not an email client but a | 202 // it should be Ok because we're not an email client but a |
199 // web browser. | 203 // web browser. |
200 | 204 |
201 // What IE6/7 does: %-escaped UTF-8. | 205 // What IE6/7 does: %-escaped UTF-8. |
202 tmp = net::UnescapeURLComponent(encoded_word, net::UnescapeRule::SPACES); | 206 decoded_word = net::UnescapeURLComponent(encoded_word, |
203 if (IsStringUTF8(tmp)) { | 207 net::UnescapeRule::SPACES); |
204 output->swap(tmp); | 208 if (decoded_word != encoded_word) |
| 209 *parse_result_flags |= |
| 210 net::HttpContentDisposition::HAS_PERCENT_ENCODED_STRINGS; |
| 211 if (IsStringUTF8(decoded_word)) { |
| 212 output->swap(decoded_word); |
205 return true; | 213 return true; |
206 // We can try either the OS default charset or 'origin charset' here, | 214 // We can try either the OS default charset or 'origin charset' here, |
207 // As far as I can tell, IE does not support it. However, I've seen | 215 // As far as I can tell, IE does not support it. However, I've seen |
208 // web servers emit %-escaped string in a legacy encoding (usually | 216 // web servers emit %-escaped string in a legacy encoding (usually |
209 // origin charset). | 217 // origin charset). |
210 // TODO(jungshik) : Test IE further and consider adding a fallback here. | 218 // TODO(jungshik) : Test IE further and consider adding a fallback here. |
211 } | 219 } |
212 return false; | 220 return false; |
213 } | 221 } |
214 | 222 |
215 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The | 223 // Decodes the value of a 'filename' or 'name' parameter given as |input|. The |
216 // value is supposed to be of the form: | 224 // value is supposed to be of the form: |
217 // | 225 // |
218 // value = token | quoted-string | 226 // value = token | quoted-string |
219 // | 227 // |
220 // However we currently also allow RFC 2047 encoding and non-ASCII | 228 // However we currently also allow RFC 2047 encoding and non-ASCII |
221 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. | 229 // strings. Non-ASCII strings are interpreted based on |referrer_charset|. |
222 bool DecodeFilenameValue(const std::string& input, | 230 bool DecodeFilenameValue(const std::string& input, |
223 const std::string& referrer_charset, | 231 const std::string& referrer_charset, |
224 std::string* output) { | 232 std::string* output, |
225 std::string tmp; | 233 int* parse_result_flags) { |
| 234 int current_parse_result_flags = 0; |
| 235 std::string decoded_value; |
| 236 bool is_previous_token_rfc2047 = true; |
| 237 |
226 // Tokenize with whitespace characters. | 238 // Tokenize with whitespace characters. |
227 StringTokenizer t(input, " \t\n\r"); | 239 StringTokenizer t(input, " \t\n\r"); |
228 t.set_options(StringTokenizer::RETURN_DELIMS); | 240 t.set_options(StringTokenizer::RETURN_DELIMS); |
229 bool is_previous_token_rfc2047 = true; | |
230 while (t.GetNext()) { | 241 while (t.GetNext()) { |
231 if (t.token_is_delim()) { | 242 if (t.token_is_delim()) { |
232 // If the previous non-delimeter token is not RFC2047-encoded, | 243 // If the previous non-delimeter token is not RFC2047-encoded, |
233 // put in a space in its place. Otheriwse, skip over it. | 244 // put in a space in its place. Otheriwse, skip over it. |
234 if (!is_previous_token_rfc2047) { | 245 if (!is_previous_token_rfc2047) |
235 tmp.push_back(' '); | 246 decoded_value.push_back(' '); |
236 } | |
237 continue; | 247 continue; |
238 } | 248 } |
239 // We don't support a single multibyte character split into | 249 // We don't support a single multibyte character split into |
240 // adjacent encoded words. Some broken mail clients emit headers | 250 // adjacent encoded words. Some broken mail clients emit headers |
241 // with that problem, but most web servers usually encode a filename | 251 // with that problem, but most web servers usually encode a filename |
242 // in a single encoded-word. Firefox/Thunderbird do not support | 252 // in a single encoded-word. Firefox/Thunderbird do not support |
243 // it, either. | 253 // it, either. |
244 std::string decoded; | 254 std::string decoded; |
245 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, | 255 if (!DecodeWord(t.token(), referrer_charset, &is_previous_token_rfc2047, |
246 &decoded)) | 256 &decoded, ¤t_parse_result_flags)) |
247 return false; | 257 return false; |
248 tmp.append(decoded); | 258 decoded_value.append(decoded); |
249 } | 259 } |
250 output->swap(tmp); | 260 output->swap(decoded_value); |
| 261 if (parse_result_flags && !output->empty()) |
| 262 *parse_result_flags |= current_parse_result_flags; |
251 return true; | 263 return true; |
252 } | 264 } |
253 | 265 |
254 // Parses the charset and value-chars out of an ext-value string. | 266 // Parses the charset and value-chars out of an ext-value string. |
255 // | 267 // |
256 // ext-value = charset "'" [ language ] "'" value-chars | 268 // ext-value = charset "'" [ language ] "'" value-chars |
257 bool ParseExtValueComponents(const std::string& input, | 269 bool ParseExtValueComponents(const std::string& input, |
258 std::string* charset, | 270 std::string* charset, |
259 std::string* value_chars) { | 271 std::string* value_chars) { |
260 StringTokenizer t(input, "'"); | 272 StringTokenizer t(input, "'"); |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
332 | 344 |
333 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); | 345 return base::ConvertToUtf8AndNormalize(unescaped, charset, decoded); |
334 } | 346 } |
335 | 347 |
336 } // namespace | 348 } // namespace |
337 | 349 |
338 namespace net { | 350 namespace net { |
339 | 351 |
340 HttpContentDisposition::HttpContentDisposition( | 352 HttpContentDisposition::HttpContentDisposition( |
341 const std::string& header, const std::string& referrer_charset) | 353 const std::string& header, const std::string& referrer_charset) |
342 : type_(INLINE) { | 354 : type_(INLINE), |
| 355 parse_result_flags_(INVALID) { |
343 Parse(header, referrer_charset); | 356 Parse(header, referrer_charset); |
344 } | 357 } |
345 | 358 |
346 HttpContentDisposition::~HttpContentDisposition() { | 359 HttpContentDisposition::~HttpContentDisposition() { |
347 } | 360 } |
348 | 361 |
349 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( | 362 std::string::const_iterator HttpContentDisposition::ConsumeDispositionType( |
350 std::string::const_iterator begin, std::string::const_iterator end) { | 363 std::string::const_iterator begin, std::string::const_iterator end) { |
351 DCHECK(type_ == INLINE); | 364 DCHECK(type_ == INLINE); |
352 std::string::const_iterator delimiter = std::find(begin, end, ';'); | 365 std::string::const_iterator delimiter = std::find(begin, end, ';'); |
353 | 366 |
354 std::string::const_iterator type_begin = begin; | 367 std::string::const_iterator type_begin = begin; |
355 std::string::const_iterator type_end = delimiter; | 368 std::string::const_iterator type_end = delimiter; |
356 HttpUtil::TrimLWS(&type_begin, &type_end); | 369 HttpUtil::TrimLWS(&type_begin, &type_end); |
357 | 370 |
358 // If the disposition-type isn't a valid token the then the | 371 // If the disposition-type isn't a valid token the then the |
359 // Content-Disposition header is malformed, and we treat the first bytes as | 372 // Content-Disposition header is malformed, and we treat the first bytes as |
360 // a parameter rather than a disposition-type. | 373 // a parameter rather than a disposition-type. |
361 if (!HttpUtil::IsToken(type_begin, type_end)) | 374 if (!HttpUtil::IsToken(type_begin, type_end)) |
362 return begin; | 375 return begin; |
363 | 376 |
| 377 parse_result_flags_ |= HAS_DISPOSITION_TYPE; |
| 378 |
364 DCHECK(std::find(type_begin, type_end, '=') == type_end); | 379 DCHECK(std::find(type_begin, type_end, '=') == type_end); |
365 | 380 |
366 if (!LowerCaseEqualsASCII(type_begin, type_end, "inline")) | 381 if (LowerCaseEqualsASCII(type_begin, type_end, "inline")) { |
| 382 type_ = INLINE; |
| 383 } else if (LowerCaseEqualsASCII(type_begin, type_end, "attachment")) { |
367 type_ = ATTACHMENT; | 384 type_ = ATTACHMENT; |
| 385 } else { |
| 386 parse_result_flags_ |= HAS_UNKNOWN_DISPOSITION_TYPE; |
| 387 type_ = ATTACHMENT; |
| 388 } |
368 return delimiter; | 389 return delimiter; |
369 } | 390 } |
370 | 391 |
371 // http://tools.ietf.org/html/rfc6266 | 392 // http://tools.ietf.org/html/rfc6266 |
372 // | 393 // |
373 // content-disposition = "Content-Disposition" ":" | 394 // content-disposition = "Content-Disposition" ":" |
374 // disposition-type *( ";" disposition-parm ) | 395 // disposition-type *( ";" disposition-parm ) |
375 // | 396 // |
376 // disposition-type = "inline" | "attachment" | disp-ext-type | 397 // disposition-type = "inline" | "attachment" | disp-ext-type |
377 // ; case-insensitive | 398 // ; case-insensitive |
(...skipping 19 matching lines...) Expand all Loading... |
397 | 418 |
398 std::string name; | 419 std::string name; |
399 std::string filename; | 420 std::string filename; |
400 std::string ext_filename; | 421 std::string ext_filename; |
401 | 422 |
402 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); | 423 HttpUtil::NameValuePairsIterator iter(pos, end, ';'); |
403 while (iter.GetNext()) { | 424 while (iter.GetNext()) { |
404 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 425 if (filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
405 iter.name_end(), | 426 iter.name_end(), |
406 "filename")) { | 427 "filename")) { |
407 DecodeFilenameValue(iter.value(), referrer_charset, &filename); | 428 DecodeFilenameValue(iter.value(), referrer_charset, &filename, |
| 429 &parse_result_flags_); |
| 430 if (!filename.empty()) |
| 431 parse_result_flags_ |= HAS_FILENAME; |
408 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 432 } else if (name.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
409 iter.name_end(), | 433 iter.name_end(), |
410 "name")) { | 434 "name")) { |
411 DecodeFilenameValue(iter.value(), referrer_charset, &name); | 435 DecodeFilenameValue(iter.value(), referrer_charset, &name, NULL); |
| 436 if (!name.empty()) |
| 437 parse_result_flags_ |= HAS_NAME; |
412 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), | 438 } else if (ext_filename.empty() && LowerCaseEqualsASCII(iter.name_begin(), |
413 iter.name_end(), | 439 iter.name_end(), |
414 "filename*")) { | 440 "filename*")) { |
415 DecodeExtValue(iter.raw_value(), &ext_filename); | 441 DecodeExtValue(iter.raw_value(), &ext_filename); |
| 442 if (!ext_filename.empty()) |
| 443 parse_result_flags_ |= HAS_EXT_FILENAME; |
416 } | 444 } |
417 } | 445 } |
418 | 446 |
419 if (!ext_filename.empty()) | 447 if (!ext_filename.empty()) |
420 filename_ = ext_filename; | 448 filename_ = ext_filename; |
421 else if (!filename.empty()) | 449 else if (!filename.empty()) |
422 filename_ = filename; | 450 filename_ = filename; |
423 else | 451 else |
424 filename_ = name; | 452 filename_ = name; |
425 } | 453 } |
426 | 454 |
427 } // namespace net | 455 } // namespace net |
OLD | NEW |