OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "sync/internal_api/public/base/unique_position.h" | 5 #include "components/sync/base/unique_position.h" |
6 | 6 |
7 #include <stddef.h> | 7 #include <stddef.h> |
8 #include <stdint.h> | 8 #include <stdint.h> |
9 | 9 |
10 #include <algorithm> | 10 #include <algorithm> |
11 #include <limits> | 11 #include <limits> |
12 | 12 |
13 #include "base/logging.h" | 13 #include "base/logging.h" |
14 #include "base/rand_util.h" | 14 #include "base/rand_util.h" |
15 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
16 #include "base/strings/string_number_conversions.h" | 16 #include "base/strings/string_number_conversions.h" |
17 #include "sync/protocol/unique_position.pb.h" | 17 #include "components/sync/protocol/unique_position.pb.h" |
18 #include "third_party/zlib/zlib.h" | 18 #include "third_party/zlib/zlib.h" |
19 | 19 |
20 namespace syncer { | 20 namespace syncer { |
21 | 21 |
22 const size_t UniquePosition::kSuffixLength = 28; | 22 const size_t UniquePosition::kSuffixLength = 28; |
23 const size_t UniquePosition::kCompressBytesThreshold = 128; | 23 const size_t UniquePosition::kCompressBytesThreshold = 128; |
24 | 24 |
25 // static. | 25 // static. |
26 bool UniquePosition::IsValidSuffix(const std::string& suffix) { | 26 bool UniquePosition::IsValidSuffix(const std::string& suffix) { |
27 // The suffix must be exactly the specified length, otherwise unique suffixes | 27 // The suffix must be exactly the specified length, otherwise unique suffixes |
28 // are not sufficient to guarantee unique positions (because prefix + suffix | 28 // are not sufficient to guarantee unique positions (because prefix + suffix |
29 // == p + refixsuffix). | 29 // == p + refixsuffix). |
30 return suffix.length() == kSuffixLength | 30 return suffix.length() == kSuffixLength && suffix[kSuffixLength - 1] != 0; |
31 && suffix[kSuffixLength-1] != 0; | |
32 } | 31 } |
33 | 32 |
34 // static. | 33 // static. |
35 bool UniquePosition::IsValidBytes(const std::string& bytes) { | 34 bool UniquePosition::IsValidBytes(const std::string& bytes) { |
36 // The first condition ensures that our suffix uniqueness is sufficient to | 35 // The first condition ensures that our suffix uniqueness is sufficient to |
37 // guarantee position uniqueness. Otherwise, it's possible the end of some | 36 // guarantee position uniqueness. Otherwise, it's possible the end of some |
38 // prefix + some short suffix == some long suffix. | 37 // prefix + some short suffix == some long suffix. |
39 // The second condition ensures that FindSmallerWithSuffix can always return a | 38 // The second condition ensures that FindSmallerWithSuffix can always return a |
40 // result. | 39 // result. |
41 return bytes.length() >= kSuffixLength | 40 return bytes.length() >= kSuffixLength && bytes[bytes.length() - 1] != 0; |
42 && bytes[bytes.length()-1] != 0; | |
43 } | 41 } |
44 | 42 |
45 // static. | 43 // static. |
46 std::string UniquePosition::RandomSuffix() { | 44 std::string UniquePosition::RandomSuffix() { |
47 // Users random data for all but the last byte. The last byte must not be | 45 // Users random data for all but the last byte. The last byte must not be |
48 // zero. We arbitrarily set it to 0x7f. | 46 // zero. We arbitrarily set it to 0x7f. |
49 return base::RandBytesAsString(kSuffixLength - 1) + "\x7f"; | 47 return base::RandBytesAsString(kSuffixLength - 1) + "\x7f"; |
50 } | 48 } |
51 | 49 |
52 // static. | 50 // static. |
(...skipping 17 matching lines...) Expand all Loading... |
70 int result = uncompress( | 68 int result = uncompress( |
71 reinterpret_cast<Bytef*>(string_as_array(&un_gzipped)), | 69 reinterpret_cast<Bytef*>(string_as_array(&un_gzipped)), |
72 &uncompressed_len, | 70 &uncompressed_len, |
73 reinterpret_cast<const Bytef*>(proto.compressed_value().data()), | 71 reinterpret_cast<const Bytef*>(proto.compressed_value().data()), |
74 proto.compressed_value().size()); | 72 proto.compressed_value().size()); |
75 if (result != Z_OK) { | 73 if (result != Z_OK) { |
76 DLOG(ERROR) << "Unzip failed " << result; | 74 DLOG(ERROR) << "Unzip failed " << result; |
77 return UniquePosition::CreateInvalid(); | 75 return UniquePosition::CreateInvalid(); |
78 } | 76 } |
79 if (uncompressed_len != proto.uncompressed_length()) { | 77 if (uncompressed_len != proto.uncompressed_length()) { |
80 DLOG(ERROR) | 78 DLOG(ERROR) << "Uncompressed length " << uncompressed_len |
81 << "Uncompressed length " << uncompressed_len | 79 << " did not match specified length " |
82 << " did not match specified length " << proto.uncompressed_length(); | 80 << proto.uncompressed_length(); |
83 return UniquePosition::CreateInvalid(); | 81 return UniquePosition::CreateInvalid(); |
84 } | 82 } |
85 return UniquePosition(Compress(un_gzipped)); | 83 return UniquePosition(Compress(un_gzipped)); |
86 } else { | 84 } else { |
87 return UniquePosition::CreateInvalid(); | 85 return UniquePosition::CreateInvalid(); |
88 } | 86 } |
89 } | 87 } |
90 | 88 |
91 // static. | 89 // static. |
92 UniquePosition UniquePosition::FromInt64(int64_t x, const std::string& suffix) { | 90 UniquePosition UniquePosition::FromInt64(int64_t x, const std::string& suffix) { |
93 uint64_t y = static_cast<uint64_t>(x); | 91 uint64_t y = static_cast<uint64_t>(x); |
94 y ^= 0x8000000000000000ULL; // Make it non-negative. | 92 y ^= 0x8000000000000000ULL; // Make it non-negative. |
95 std::string bytes(8, 0); | 93 std::string bytes(8, 0); |
96 for (int i = 7; i >= 0; --i) { | 94 for (int i = 7; i >= 0; --i) { |
97 bytes[i] = static_cast<uint8_t>(y); | 95 bytes[i] = static_cast<uint8_t>(y); |
98 y >>= 8; | 96 y >>= 8; |
99 } | 97 } |
100 return UniquePosition(bytes + suffix, suffix); | 98 return UniquePosition(bytes + suffix, suffix); |
101 } | 99 } |
102 | 100 |
103 // static. | 101 // static. |
104 UniquePosition UniquePosition::InitialPosition( | 102 UniquePosition UniquePosition::InitialPosition(const std::string& suffix) { |
105 const std::string& suffix) { | |
106 DCHECK(IsValidSuffix(suffix)); | 103 DCHECK(IsValidSuffix(suffix)); |
107 return UniquePosition(suffix, suffix); | 104 return UniquePosition(suffix, suffix); |
108 } | 105 } |
109 | 106 |
110 // static. | 107 // static. |
111 UniquePosition UniquePosition::Before( | 108 UniquePosition UniquePosition::Before(const UniquePosition& x, |
112 const UniquePosition& x, | 109 const std::string& suffix) { |
113 const std::string& suffix) { | |
114 DCHECK(IsValidSuffix(suffix)); | 110 DCHECK(IsValidSuffix(suffix)); |
115 DCHECK(x.IsValid()); | 111 DCHECK(x.IsValid()); |
116 const std::string& before = FindSmallerWithSuffix( | 112 const std::string& before = |
117 Uncompress(x.compressed_), suffix); | 113 FindSmallerWithSuffix(Uncompress(x.compressed_), suffix); |
118 return UniquePosition(before + suffix, suffix); | 114 return UniquePosition(before + suffix, suffix); |
119 } | 115 } |
120 | 116 |
121 // static. | 117 // static. |
122 UniquePosition UniquePosition::After( | 118 UniquePosition UniquePosition::After(const UniquePosition& x, |
123 const UniquePosition& x, | 119 const std::string& suffix) { |
124 const std::string& suffix) { | |
125 DCHECK(IsValidSuffix(suffix)); | 120 DCHECK(IsValidSuffix(suffix)); |
126 DCHECK(x.IsValid()); | 121 DCHECK(x.IsValid()); |
127 const std::string& after = FindGreaterWithSuffix( | 122 const std::string& after = |
128 Uncompress(x.compressed_), suffix); | 123 FindGreaterWithSuffix(Uncompress(x.compressed_), suffix); |
129 return UniquePosition(after + suffix, suffix); | 124 return UniquePosition(after + suffix, suffix); |
130 } | 125 } |
131 | 126 |
132 // static. | 127 // static. |
133 UniquePosition UniquePosition::Between( | 128 UniquePosition UniquePosition::Between(const UniquePosition& before, |
134 const UniquePosition& before, | 129 const UniquePosition& after, |
135 const UniquePosition& after, | 130 const std::string& suffix) { |
136 const std::string& suffix) { | |
137 DCHECK(before.IsValid()); | 131 DCHECK(before.IsValid()); |
138 DCHECK(after.IsValid()); | 132 DCHECK(after.IsValid()); |
139 DCHECK(before.LessThan(after)); | 133 DCHECK(before.LessThan(after)); |
140 DCHECK(IsValidSuffix(suffix)); | 134 DCHECK(IsValidSuffix(suffix)); |
141 const std::string& mid = FindBetweenWithSuffix( | 135 const std::string& mid = FindBetweenWithSuffix( |
142 Uncompress(before.compressed_), | 136 Uncompress(before.compressed_), Uncompress(after.compressed_), suffix); |
143 Uncompress(after.compressed_), | |
144 suffix); | |
145 return UniquePosition(mid + suffix, suffix); | 137 return UniquePosition(mid + suffix, suffix); |
146 } | 138 } |
147 | 139 |
148 UniquePosition::UniquePosition() : is_valid_(false) {} | 140 UniquePosition::UniquePosition() : is_valid_(false) {} |
149 | 141 |
150 bool UniquePosition::LessThan(const UniquePosition& other) const { | 142 bool UniquePosition::LessThan(const UniquePosition& other) const { |
151 DCHECK(this->IsValid()); | 143 DCHECK(this->IsValid()); |
152 DCHECK(other.IsValid()); | 144 DCHECK(other.IsValid()); |
153 | 145 |
154 return compressed_ < other.compressed_; | 146 return compressed_ < other.compressed_; |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
217 debug_string.append(", compressed: " + compressed_string); | 209 debug_string.append(", compressed: " + compressed_string); |
218 return debug_string; | 210 return debug_string; |
219 } | 211 } |
220 | 212 |
221 std::string UniquePosition::GetSuffixForTest() const { | 213 std::string UniquePosition::GetSuffixForTest() const { |
222 const std::string bytes = Uncompress(compressed_); | 214 const std::string bytes = Uncompress(compressed_); |
223 const size_t prefix_len = bytes.length() - kSuffixLength; | 215 const size_t prefix_len = bytes.length() - kSuffixLength; |
224 return bytes.substr(prefix_len, std::string::npos); | 216 return bytes.substr(prefix_len, std::string::npos); |
225 } | 217 } |
226 | 218 |
227 std::string UniquePosition::FindSmallerWithSuffix( | 219 std::string UniquePosition::FindSmallerWithSuffix(const std::string& reference, |
228 const std::string& reference, | 220 const std::string& suffix) { |
229 const std::string& suffix) { | |
230 size_t ref_zeroes = reference.find_first_not_of('\0'); | 221 size_t ref_zeroes = reference.find_first_not_of('\0'); |
231 size_t suffix_zeroes = suffix.find_first_not_of('\0'); | 222 size_t suffix_zeroes = suffix.find_first_not_of('\0'); |
232 | 223 |
233 // Neither of our inputs are allowed to have trailing zeroes, so the following | 224 // Neither of our inputs are allowed to have trailing zeroes, so the following |
234 // must be true. | 225 // must be true. |
235 DCHECK_NE(ref_zeroes, std::string::npos); | 226 DCHECK_NE(ref_zeroes, std::string::npos); |
236 DCHECK_NE(suffix_zeroes, std::string::npos); | 227 DCHECK_NE(suffix_zeroes, std::string::npos); |
237 | 228 |
238 if (suffix_zeroes > ref_zeroes) { | 229 if (suffix_zeroes > ref_zeroes) { |
239 // Implies suffix < ref. | 230 // Implies suffix < ref. |
(...skipping 10 matching lines...) Expand all Loading... |
250 return std::string(ref_zeroes - suffix_zeroes + 1, '\0'); | 241 return std::string(ref_zeroes - suffix_zeroes + 1, '\0'); |
251 } else { | 242 } else { |
252 // Prepend zeroes to match those in the |reference|, then something smaller | 243 // Prepend zeroes to match those in the |reference|, then something smaller |
253 // than the first non-zero digit in |reference|. | 244 // than the first non-zero digit in |reference|. |
254 char lt_digit = static_cast<uint8_t>(reference[ref_zeroes]) / 2; | 245 char lt_digit = static_cast<uint8_t>(reference[ref_zeroes]) / 2; |
255 return std::string(ref_zeroes, '\0') + lt_digit; | 246 return std::string(ref_zeroes, '\0') + lt_digit; |
256 } | 247 } |
257 } | 248 } |
258 | 249 |
259 // static | 250 // static |
260 std::string UniquePosition::FindGreaterWithSuffix( | 251 std::string UniquePosition::FindGreaterWithSuffix(const std::string& reference, |
261 const std::string& reference, | 252 const std::string& suffix) { |
262 const std::string& suffix) { | |
263 size_t ref_FFs = | 253 size_t ref_FFs = |
264 reference.find_first_not_of(std::numeric_limits<uint8_t>::max()); | 254 reference.find_first_not_of(std::numeric_limits<uint8_t>::max()); |
265 size_t suffix_FFs = | 255 size_t suffix_FFs = |
266 suffix.find_first_not_of(std::numeric_limits<uint8_t>::max()); | 256 suffix.find_first_not_of(std::numeric_limits<uint8_t>::max()); |
267 | 257 |
268 if (ref_FFs == std::string::npos) { | 258 if (ref_FFs == std::string::npos) { |
269 ref_FFs = reference.length(); | 259 ref_FFs = reference.length(); |
270 } | 260 } |
271 if (suffix_FFs == std::string::npos) { | 261 if (suffix_FFs == std::string::npos) { |
272 suffix_FFs = suffix.length(); | 262 suffix_FFs = suffix.length(); |
(...skipping 19 matching lines...) Expand all Loading... |
292 // than the first non-FF digit in |reference|. | 282 // than the first non-FF digit in |reference|. |
293 char gt_digit = static_cast<uint8_t>(reference[ref_FFs]) + | 283 char gt_digit = static_cast<uint8_t>(reference[ref_FFs]) + |
294 (std::numeric_limits<uint8_t>::max() - | 284 (std::numeric_limits<uint8_t>::max() - |
295 static_cast<uint8_t>(reference[ref_FFs]) + 1) / | 285 static_cast<uint8_t>(reference[ref_FFs]) + 1) / |
296 2; | 286 2; |
297 return std::string(ref_FFs, std::numeric_limits<uint8_t>::max()) + gt_digit; | 287 return std::string(ref_FFs, std::numeric_limits<uint8_t>::max()) + gt_digit; |
298 } | 288 } |
299 } | 289 } |
300 | 290 |
301 // static | 291 // static |
302 std::string UniquePosition::FindBetweenWithSuffix( | 292 std::string UniquePosition::FindBetweenWithSuffix(const std::string& before, |
303 const std::string& before, | 293 const std::string& after, |
304 const std::string& after, | 294 const std::string& suffix) { |
305 const std::string& suffix) { | |
306 DCHECK(IsValidSuffix(suffix)); | 295 DCHECK(IsValidSuffix(suffix)); |
307 DCHECK_NE(before, after); | 296 DCHECK_NE(before, after); |
308 DCHECK_LT(before, after); | 297 DCHECK_LT(before, after); |
309 | 298 |
310 std::string mid; | 299 std::string mid; |
311 | 300 |
312 // Sometimes our suffix puts us where we want to be. | 301 // Sometimes our suffix puts us where we want to be. |
313 if (before < suffix && suffix < after) { | 302 if (before < suffix && suffix < after) { |
314 return std::string(); | 303 return std::string(); |
315 } | 304 } |
316 | 305 |
317 size_t i = 0; | 306 size_t i = 0; |
318 for ( ; i < std::min(before.length(), after.length()); ++i) { | 307 for (; i < std::min(before.length(), after.length()); ++i) { |
319 uint8_t a_digit = before[i]; | 308 uint8_t a_digit = before[i]; |
320 uint8_t b_digit = after[i]; | 309 uint8_t b_digit = after[i]; |
321 | 310 |
322 if (b_digit - a_digit >= 2) { | 311 if (b_digit - a_digit >= 2) { |
323 mid.push_back(a_digit + (b_digit - a_digit)/2); | 312 mid.push_back(a_digit + (b_digit - a_digit) / 2); |
324 return mid; | 313 return mid; |
325 } else if (a_digit == b_digit) { | 314 } else if (a_digit == b_digit) { |
326 mid.push_back(a_digit); | 315 mid.push_back(a_digit); |
327 | 316 |
328 // Both strings are equal so far. Will appending the suffix at this point | 317 // Both strings are equal so far. Will appending the suffix at this point |
329 // give us the comparison we're looking for? | 318 // give us the comparison we're looking for? |
330 if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) { | 319 if (before.substr(i + 1) < suffix && suffix < after.substr(i + 1)) { |
331 return mid; | 320 return mid; |
332 } | 321 } |
333 } else { | 322 } else { |
334 DCHECK_EQ(b_digit - a_digit, 1); // Implied by above if branches. | 323 DCHECK_EQ(b_digit - a_digit, 1); // Implied by above if branches. |
335 | 324 |
336 // The two options are off by one digit. The choice of whether to round | 325 // The two options are off by one digit. The choice of whether to round |
337 // up or down here will have consequences on what we do with the remaining | 326 // up or down here will have consequences on what we do with the remaining |
338 // digits. Exploring both options is an optimization and is not required | 327 // digits. Exploring both options is an optimization and is not required |
339 // for the correctness of this algorithm. | 328 // for the correctness of this algorithm. |
340 | 329 |
341 // Option A: Round down the current digit. This makes our |mid| < | 330 // Option A: Round down the current digit. This makes our |mid| < |
342 // |after|, no matter what we append afterwards. We then focus on | 331 // |after|, no matter what we append afterwards. We then focus on |
343 // appending digits until |mid| > |before|. | 332 // appending digits until |mid| > |before|. |
344 std::string mid_a = mid; | 333 std::string mid_a = mid; |
345 mid_a.push_back(a_digit); | 334 mid_a.push_back(a_digit); |
346 mid_a.append(FindGreaterWithSuffix(before.substr(i+1), suffix)); | 335 mid_a.append(FindGreaterWithSuffix(before.substr(i + 1), suffix)); |
347 | 336 |
348 // Option B: Round up the current digit. This makes our |mid| > |before|, | 337 // Option B: Round up the current digit. This makes our |mid| > |before|, |
349 // no matter what we append afterwards. We then focus on appending digits | 338 // no matter what we append afterwards. We then focus on appending digits |
350 // until |mid| < |after|. Note that this option may not be viable if the | 339 // until |mid| < |after|. Note that this option may not be viable if the |
351 // current digit is the last one in |after|, so we skip the option in that | 340 // current digit is the last one in |after|, so we skip the option in that |
352 // case. | 341 // case. |
353 if (after.length() > i+1) { | 342 if (after.length() > i + 1) { |
354 std::string mid_b = mid; | 343 std::string mid_b = mid; |
355 mid_b.push_back(b_digit); | 344 mid_b.push_back(b_digit); |
356 mid_b.append(FindSmallerWithSuffix(after.substr(i+1), suffix)); | 345 mid_b.append(FindSmallerWithSuffix(after.substr(i + 1), suffix)); |
357 | 346 |
358 // Does this give us a shorter position value? If so, use it. | 347 // Does this give us a shorter position value? If so, use it. |
359 if (mid_b.length() < mid_a.length()) { | 348 if (mid_b.length() < mid_a.length()) { |
360 return mid_b; | 349 return mid_b; |
361 } | 350 } |
362 } | 351 } |
363 return mid_a; | 352 return mid_a; |
364 } | 353 } |
365 } | 354 } |
366 | 355 |
367 // If we haven't found a midpoint yet, the following must be true: | 356 // If we haven't found a midpoint yet, the following must be true: |
368 DCHECK_EQ(before.substr(0, i), after.substr(0, i)); | 357 DCHECK_EQ(before.substr(0, i), after.substr(0, i)); |
369 DCHECK_EQ(before, mid); | 358 DCHECK_EQ(before, mid); |
370 DCHECK_LT(before.length(), after.length()); | 359 DCHECK_LT(before.length(), after.length()); |
371 | 360 |
372 // We know that we'll need to append at least one more byte to |mid| in the | 361 // We know that we'll need to append at least one more byte to |mid| in the |
373 // process of making it < |after|. Appending any digit, regardless of the | 362 // process of making it < |after|. Appending any digit, regardless of the |
374 // value, will make |before| < |mid|. Therefore, the following will get us a | 363 // value, will make |before| < |mid|. Therefore, the following will get us a |
375 // valid position. | 364 // valid position. |
376 | 365 |
377 mid.append(FindSmallerWithSuffix(after.substr(i), suffix)); | 366 mid.append(FindSmallerWithSuffix(after.substr(i), suffix)); |
378 return mid; | 367 return mid; |
379 } | 368 } |
380 | 369 |
381 UniquePosition::UniquePosition(const std::string& internal_rep) | 370 UniquePosition::UniquePosition(const std::string& internal_rep) |
382 : compressed_(internal_rep), | 371 : compressed_(internal_rep), |
383 is_valid_(IsValidBytes(Uncompress(internal_rep))) { | 372 is_valid_(IsValidBytes(Uncompress(internal_rep))) {} |
384 } | |
385 | 373 |
386 UniquePosition::UniquePosition( | 374 UniquePosition::UniquePosition(const std::string& uncompressed, |
387 const std::string& uncompressed, | 375 const std::string& suffix) |
388 const std::string& suffix) | 376 : compressed_(Compress(uncompressed)), |
389 : compressed_(Compress(uncompressed)), | 377 is_valid_(IsValidBytes(uncompressed)) { |
390 is_valid_(IsValidBytes(uncompressed)) { | |
391 DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length()); | 378 DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length()); |
392 DCHECK(IsValidSuffix(suffix)); | 379 DCHECK(IsValidSuffix(suffix)); |
393 DCHECK(IsValid()); | 380 DCHECK(IsValid()); |
394 } | 381 } |
395 | 382 |
396 // On custom compression: | 383 // On custom compression: |
397 // | 384 // |
398 // Let C(x) be the compression function and U(x) be the uncompression function. | 385 // Let C(x) be the compression function and U(x) be the uncompression function. |
399 // | 386 // |
400 // This compression scheme has a few special properties. For one, it is | 387 // This compression scheme has a few special properties. For one, it is |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
511 } else { | 498 } else { |
512 length = encoded_length; | 499 length = encoded_length; |
513 } | 500 } |
514 | 501 |
515 return length; | 502 return length; |
516 } | 503 } |
517 | 504 |
518 // A series of four identical chars at the beginning of a block indicates | 505 // A series of four identical chars at the beginning of a block indicates |
519 // the beginning of a repeated character block. | 506 // the beginning of a repeated character block. |
520 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) { | 507 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) { |
521 return chars[start_index] == chars[start_index+1] | 508 return chars[start_index] == chars[start_index + 1] && |
522 && chars[start_index] == chars[start_index+2] | 509 chars[start_index] == chars[start_index + 2] && |
523 && chars[start_index] == chars[start_index+3]; | 510 chars[start_index] == chars[start_index + 3]; |
524 } | 511 } |
525 | 512 |
526 } // namespace | 513 } // namespace |
527 | 514 |
528 // static | 515 // static |
529 // Wraps the CompressImpl function with a bunch of DCHECKs. | 516 // Wraps the CompressImpl function with a bunch of DCHECKs. |
530 std::string UniquePosition::Compress(const std::string& str) { | 517 std::string UniquePosition::Compress(const std::string& str) { |
531 DCHECK(IsValidBytes(str)); | 518 DCHECK(IsValidBytes(str)); |
532 std::string compressed = CompressImpl(str); | 519 std::string compressed = CompressImpl(str); |
533 DCHECK(IsValidCompressed(compressed)); | 520 DCHECK(IsValidCompressed(compressed)); |
534 DCHECK_EQ(str, Uncompress(compressed)); | 521 DCHECK_EQ(str, Uncompress(compressed)); |
535 return compressed; | 522 return compressed; |
536 } | 523 } |
537 | 524 |
538 // static | 525 // static |
539 // Performs the order preserving run length compression of a given input string. | 526 // Performs the order preserving run length compression of a given input string. |
540 std::string UniquePosition::CompressImpl(const std::string& str) { | 527 std::string UniquePosition::CompressImpl(const std::string& str) { |
541 std::string output; | 528 std::string output; |
542 | 529 |
543 // The compressed length will usually be at least as long as the suffix (28), | 530 // The compressed length will usually be at least as long as the suffix (28), |
544 // since the suffix bytes are mostly random. Most are a few bytes longer; a | 531 // since the suffix bytes are mostly random. Most are a few bytes longer; a |
545 // small few are tens of bytes longer. Some early tests indicated that | 532 // small few are tens of bytes longer. Some early tests indicated that |
546 // roughly 99% had length 40 or smaller. We guess that pre-sizing for 48 is a | 533 // roughly 99% had length 40 or smaller. We guess that pre-sizing for 48 is a |
547 // good trade-off, but that has not been confirmed through profiling. | 534 // good trade-off, but that has not been confirmed through profiling. |
548 output.reserve(48); | 535 output.reserve(48); |
549 | 536 |
550 // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the | 537 // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the |
551 // length of a string of identical digits starting at i. | 538 // length of a string of identical digits starting at i. |
552 for (size_t i = 0; i < str.length(); ) { | 539 for (size_t i = 0; i < str.length();) { |
553 if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) { | 540 if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) { |
554 // Four identical bytes in a row at this position means that we must start | 541 // Four identical bytes in a row at this position means that we must start |
555 // a repeated character block. Begin by outputting those four bytes. | 542 // a repeated character block. Begin by outputting those four bytes. |
556 output.append(str, i, 4); | 543 output.append(str, i, 4); |
557 | 544 |
558 // Determine the size of the run. | 545 // Determine the size of the run. |
559 const char rep_digit = str[i]; | 546 const char rep_digit = str[i]; |
560 const size_t runs_until = str.find_first_not_of(rep_digit, i+4); | 547 const size_t runs_until = str.find_first_not_of(rep_digit, i + 4); |
561 | 548 |
562 // Handle the 'runs until end' special case specially. | 549 // Handle the 'runs until end' special case specially. |
563 size_t run_length; | 550 size_t run_length; |
564 bool encode_high; // True if the next byte is greater than |rep_digit|. | 551 bool encode_high; // True if the next byte is greater than |rep_digit|. |
565 if (runs_until == std::string::npos) { | 552 if (runs_until == std::string::npos) { |
566 run_length = str.length() - i; | 553 run_length = str.length() - i; |
567 encode_high = false; | 554 encode_high = false; |
568 } else { | 555 } else { |
569 run_length = runs_until - i; | 556 run_length = runs_until - i; |
570 encode_high = static_cast<uint8_t>(str[runs_until]) > | 557 encode_high = static_cast<uint8_t>(str[runs_until]) > |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
611 | 598 |
612 bool UniquePosition::IsValidCompressed(const std::string& str) { | 599 bool UniquePosition::IsValidCompressed(const std::string& str) { |
613 for (size_t i = 0; i + 8 <= str.length(); i += 8) { | 600 for (size_t i = 0; i + 8 <= str.length(); i += 8) { |
614 if (IsRepeatedCharPrefix(str, i)) { | 601 if (IsRepeatedCharPrefix(str, i)) { |
615 uint32_t count = ReadEncodedRunLength(str, i + 4); | 602 uint32_t count = ReadEncodedRunLength(str, i + 4); |
616 if (count < 4) { | 603 if (count < 4) { |
617 // A repeated character block should at least represent the four | 604 // A repeated character block should at least represent the four |
618 // characters that started it. | 605 // characters that started it. |
619 return false; | 606 return false; |
620 } | 607 } |
621 if (str[i] == str[i+4]) { | 608 if (str[i] == str[i + 4]) { |
622 // Does the next digit after a count match the repeated character? Then | 609 // Does the next digit after a count match the repeated character? Then |
623 // this is not the highest possible count. | 610 // this is not the highest possible count. |
624 return false; | 611 return false; |
625 } | 612 } |
626 } | 613 } |
627 } | 614 } |
628 // We don't bother looking for the existence or checking the validity of | 615 // We don't bother looking for the existence or checking the validity of |
629 // any partial blocks. There's no way they could be invalid anyway. | 616 // any partial blocks. There's no way they could be invalid anyway. |
630 return true; | 617 return true; |
631 } | 618 } |
632 | 619 |
633 } // namespace syncer | 620 } // namespace syncer |
OLD | NEW |