Chromium Code Reviews| Index: courgette/third_party/bsdiff/bsdiff_search.h |
| diff --git a/courgette/third_party/bsdiff/bsdiff_search.h b/courgette/third_party/bsdiff/bsdiff_search.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..38d05306c991a7fabe416a3eb7ef79061000989d |
| --- /dev/null |
| +++ b/courgette/third_party/bsdiff/bsdiff_search.h |
| @@ -0,0 +1,99 @@ |
| +// Copyright 2003, 2004 Colin Percival |
| +// All rights reserved |
| +// |
| +// Redistribution and use in source and binary forms, with or without |
| +// modification, are permitted providing that the following conditions |
| +// are met: |
| +// 1. Redistributions of source code must retain the above copyright |
| +// notice, this list of conditions and the following disclaimer. |
| +// 2. Redistributions in binary form must reproduce the above copyright |
| +// notice, this list of conditions and the following disclaimer in the |
| +// documentation and/or other materials provided with the distribution. |
| +// |
| +// For the terms under which this work may be distributed, please see |
| +// the adjoining file "LICENSE". |
| +// |
| +// bsdiff_search.h -- Suffix array search. |
| +// |
| +// ChangeLog: |
| +// 2005-05-05 - Use the modified header struct from bspatch.h; use 32-bit |
| +// values throughout. |
| +// --Benjamin Smedberg <benjamin@smedbergs.us> |
| +// 2015-08-03 - Change search() to template to allow PagedArray usage. |
| +// --Samuel Huang <huangs@chromium.org> |
| +// 2015-08-19 - Optimized search(). |
| +// --Samuel Huang <huangs@chromium.org> |
| +// 2016-06-02 - Move matchlen() and search() to a new file; comment; format. |
| +// --Samuel Huang <huangs@chromium.org> |
| + |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +#include <algorithm> |
| +#include <cstring> |
| + |
| +namespace bsdiff { |
| + |
| +// Similar to ::memcmp(), but returns match length instead. |
| +inline int matchlen(const unsigned char *srcbuf, const unsigned char *keybuf, |
| + int size) { |
| + for (int i = 0; i < size; ++i) |
| + if (srcbuf[i] != keybuf[i]) |
| + return i; |
| + return size; |
| +} |
| + |
| +// Finds a suffix in |srcbuf| that has the longest common prefix with |keybuf|, |
| +// aided by suffix array |I| of |srcbuf|. Returns the length of the match, and |
| +// writes the position within |srcbuf| to |pos|. |
| +template <class T> |
| +int search(T I, const unsigned char *srcbuf, int srcize, |
| + const unsigned char *keybuf, int keysize, int *pos) { |
| + // We use binary search here, but the desired position is not the usual |
|
huangs
2016/06/06 04:53:35
In retrospect this is ridiculously verbose. Will
|
| + // position from std::lower_bound()-like binary search. |
| + // For example, given sorted suffixes of |srcbuf|: |
| + // SAAA... |
| + // SNNN... |
| + // TAAA... |
| + // Applying std::lower_bound() binary search results in: |
| + // |keybuf| = "SNBB" => "SNNN...". |
| + // |keybuf| = "SNZZ" => "TAAA...". |
| + // But the desired result for both cases is "SNNN...", with "SN" as the |
| + // longest common prefix. To solve this: |
| + // Case 1: |keysize| == 0: We'd return 0, although |pos| is arbitrary. |
| + // Case 2: |srcsize| == 0: We'd return 0, and |pos| = 0. |
| + // Case 3: |srcsize| > 0 and |keysize| > 0: We'd assert rank-|lo| suffix |
| + // of |srcbuf| is always strictly lexicographically less than |keybuf|. This |
| + // holds for |lo| = 0, which corresponds to "". The search terminates when |
| + // hi == lo + 1. At this point, suffixes at rank |lo| and rank |hi| both |
| + // may be the desired match, so we check both. In case of tie, there's NO |
| + // guarantee that we take the solution with minimum rank. |
| + // Finding the minimum rank solution is not needed, and it would create more |
| + // complexity. For example, with suffixes "MA...", "MB...", "MC...", "ME...", |
| + // searching look for "MD" would yield |lo| -> "MC...", and the minimum rank |
| + // solution "MA...", would be rather far away. |
| + // |
| + // TODO(huangs): Optimize this: match length at |t| is >= match length at |
| + // |lo| and at |hi|. If we track this then we can skip comparisons! |
| + int lo = 0; |
| + int hi = srcize; |
| + while (hi - lo > 1) { |
| + int t = (lo + hi) / 2; |
| + if (::memcmp(srcbuf + I[t], keybuf, std::min(srcize - I[t], keysize)) < 0) |
| + lo = t; |
| + else |
| + hi = t; |
| + } |
| + |
| + int x = matchlen(srcbuf + I[lo], keybuf, std::min(srcize - I[lo], keysize)); |
| + int y = matchlen(srcbuf + I[hi], keybuf, std::min(srcize - I[hi], keysize)); |
| + if (x > y) { |
| + *pos = I[lo]; |
| + return x; |
| + } |
| + *pos = I[hi]; |
| + return y; |
| +} |
| + |
| +} // namespace bsdiff |