| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Derived from: | 5 // Derived from: |
| 6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp | 6 // mozilla/netwerk/protocol/http/src/nsHttpChunkedDecoder.cpp |
| 7 // The license block is: | 7 // The license block is: |
| 8 /* ***** BEGIN LICENSE BLOCK ***** | 8 /* ***** BEGIN LICENSE BLOCK ***** |
| 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 | 9 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 |
| 10 * | 10 * |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 45 #include "net/http/http_chunked_decoder.h" | 45 #include "net/http/http_chunked_decoder.h" |
| 46 | 46 |
| 47 #include "base/logging.h" | 47 #include "base/logging.h" |
| 48 #include "base/string_number_conversions.h" | 48 #include "base/string_number_conversions.h" |
| 49 #include "base/string_piece.h" | 49 #include "base/string_piece.h" |
| 50 #include "base/string_util.h" | 50 #include "base/string_util.h" |
| 51 #include "net/base/net_errors.h" | 51 #include "net/base/net_errors.h" |
| 52 | 52 |
| 53 namespace net { | 53 namespace net { |
| 54 | 54 |
| 55 // Absurdly long size to avoid imposing a constraint on chunked encoding |
| 56 // extensions. |
| 57 const size_t HttpChunkedDecoder::kMaxLineBufLen = 16384; |
| 58 |
| 55 HttpChunkedDecoder::HttpChunkedDecoder() | 59 HttpChunkedDecoder::HttpChunkedDecoder() |
| 56 : chunk_remaining_(0), | 60 : chunk_remaining_(0), |
| 57 chunk_terminator_remaining_(false), | 61 chunk_terminator_remaining_(false), |
| 58 reached_last_chunk_(false), | 62 reached_last_chunk_(false), |
| 59 reached_eof_(false), | 63 reached_eof_(false), |
| 60 bytes_after_eof_(0) { | 64 bytes_after_eof_(0) { |
| 61 } | 65 } |
| 62 | 66 |
| 63 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { | 67 int HttpChunkedDecoder::FilterBuf(char* buf, int buf_len) { |
| 64 int result = 0; | 68 int result = 0; |
| (...skipping 23 matching lines...) Expand all Loading... |
| 88 | 92 |
| 89 buf_len -= bytes_consumed; | 93 buf_len -= bytes_consumed; |
| 90 if (buf_len) | 94 if (buf_len) |
| 91 memmove(buf, buf + bytes_consumed, buf_len); | 95 memmove(buf, buf + bytes_consumed, buf_len); |
| 92 } | 96 } |
| 93 | 97 |
| 94 return result; | 98 return result; |
| 95 } | 99 } |
| 96 | 100 |
| 97 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { | 101 int HttpChunkedDecoder::ScanForChunkRemaining(const char* buf, int buf_len) { |
| 98 DCHECK(chunk_remaining_ == 0); | 102 DCHECK_EQ(0, chunk_remaining_); |
| 99 DCHECK(buf_len > 0); | 103 DCHECK_GT(buf_len, 0); |
| 100 | 104 |
| 101 int bytes_consumed = 0; | 105 int bytes_consumed = 0; |
| 102 | 106 |
| 103 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); | 107 size_t index_of_lf = base::StringPiece(buf, buf_len).find('\n'); |
| 104 if (index_of_lf != base::StringPiece::npos) { | 108 if (index_of_lf != base::StringPiece::npos) { |
| 105 buf_len = static_cast<int>(index_of_lf); | 109 buf_len = static_cast<int>(index_of_lf); |
| 106 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. | 110 if (buf_len && buf[buf_len - 1] == '\r') // Eliminate a preceding CR. |
| 107 buf_len--; | 111 buf_len--; |
| 108 bytes_consumed = static_cast<int>(index_of_lf) + 1; | 112 bytes_consumed = static_cast<int>(index_of_lf) + 1; |
| 109 | 113 |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 145 } | 149 } |
| 146 line_buf_.clear(); | 150 line_buf_.clear(); |
| 147 } else { | 151 } else { |
| 148 // Save the partial line; wait for more data. | 152 // Save the partial line; wait for more data. |
| 149 bytes_consumed = buf_len; | 153 bytes_consumed = buf_len; |
| 150 | 154 |
| 151 // Ignore a trailing CR | 155 // Ignore a trailing CR |
| 152 if (buf[buf_len - 1] == '\r') | 156 if (buf[buf_len - 1] == '\r') |
| 153 buf_len--; | 157 buf_len--; |
| 154 | 158 |
| 159 if (line_buf_.length() + buf_len > kMaxLineBufLen) { |
| 160 DLOG(ERROR) << "Chunked line length too long"; |
| 161 return ERR_INVALID_CHUNKED_ENCODING; |
| 162 } |
| 163 |
| 155 line_buf_.append(buf, buf_len); | 164 line_buf_.append(buf, buf_len); |
| 156 } | 165 } |
| 157 return bytes_consumed; | 166 return bytes_consumed; |
| 158 } | 167 } |
| 159 | 168 |
| 160 | 169 |
| 161 // While the HTTP 1.1 specification defines chunk-size as 1*HEX | 170 // While the HTTP 1.1 specification defines chunk-size as 1*HEX |
| 162 // some sites rely on more lenient parsing. | 171 // some sites rely on more lenient parsing. |
| 163 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces | 172 // http://www.yahoo.com/, for example, pads chunk-size with trailing spaces |
| 164 // (0x20) to be 7 characters long, such as "819b ". | 173 // (0x20) to be 7 characters long, such as "819b ". |
| 165 // | 174 // |
| 166 // A comparison of browsers running on WindowsXP shows that | 175 // A comparison of browsers running on WindowsXP shows that |
| 167 // they will parse the following inputs (egrep syntax): | 176 // they will parse the following inputs (egrep syntax): |
| 168 // | 177 // |
| 169 // Let \X be the character class for a hex digit: [0-9a-fA-F] | 178 // Let \X be the character class for a hex digit: [0-9a-fA-F] |
| 170 // | 179 // |
| 171 // RFC 2616: ^\X+$ | 180 // RFC 2616: ^\X+$ |
| 172 // IE7: ^\X+[^\X]*$ | 181 // IE7: ^\X+[^\X]*$ |
| 173 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ | 182 // Safari 3.1: ^[\t\r ]*\X+[\t ]*$ |
| 174 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ | 183 // Firefox 3: ^[\t\f\v\r ]*[+]?(0x)?\X+[^\X]*$ |
| 175 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ | 184 // Opera 9.51: ^[\t\f\v ]*[+]?(0x)?\X+[^\X]*$ |
| 176 // | 185 // |
| 177 // Our strategy is to be as strict as possible, while not breaking | 186 // Our strategy is to be as strict as possible, while not breaking |
| 178 // known sites. | 187 // known sites. |
| 179 // | 188 // |
| 180 // Us: ^\X+[ ]*$ | 189 // Us: ^\X+[ ]*$ |
| 181 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { | 190 bool HttpChunkedDecoder::ParseChunkSize(const char* start, int len, int* out) { |
| 182 DCHECK(len >= 0); | 191 DCHECK_GE(len, 0); |
| 183 | 192 |
| 184 // Strip trailing spaces | 193 // Strip trailing spaces |
| 185 while (len && start[len - 1] == ' ') | 194 while (len && start[len - 1] == ' ') |
| 186 len--; | 195 len--; |
| 187 | 196 |
| 188 // Be more restrictive than HexStringToInt; | 197 // Be more restrictive than HexStringToInt; |
| 189 // don't allow inputs with leading "-", "+", "0x", "0X" | 198 // don't allow inputs with leading "-", "+", "0x", "0X" |
| 190 base::StringPiece chunk_size(start, len); | 199 base::StringPiece chunk_size(start, len); |
| 191 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") | 200 if (chunk_size.find_first_not_of("0123456789abcdefABCDEF") |
| 192 != base::StringPiece::npos) { | 201 != base::StringPiece::npos) { |
| 193 return false; | 202 return false; |
| 194 } | 203 } |
| 195 | 204 |
| 196 int parsed_number; | 205 int parsed_number; |
| 197 bool ok = base::HexStringToInt(chunk_size, &parsed_number); | 206 bool ok = base::HexStringToInt(chunk_size, &parsed_number); |
| 198 if (ok && parsed_number >= 0) { | 207 if (ok && parsed_number >= 0) { |
| 199 *out = parsed_number; | 208 *out = parsed_number; |
| 200 return true; | 209 return true; |
| 201 } | 210 } |
| 202 return false; | 211 return false; |
| 203 } | 212 } |
| 204 | 213 |
| 205 } // namespace net | 214 } // namespace net |
| OLD | NEW |