OLD | NEW |
| (Empty) |
1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // Package funnybase provides a varint-like signed integer encoding scheme which | |
6 // has the property that the encoded integers can be compared with unordered | |
7 // byte comparisons and they will sort correctly. | |
8 // | |
9 // It's less efficient on average than varint for small numbers (it has | |
10 // a minimum encoded size of 2 bytes), but is more efficient for large numbers | |
11 // (it has a maximum encoded size of 9 bytes for a 64 bit int). | |
12 // | |
13 // The scheme works like: | |
14 // * given an 2's compliment value V | |
15 // * extract the sign (S) and magnitude (M) of V | |
16 // * Find the position of the highest bit (P), minus 1. | |
17 // * write (bits): | |
18 // * SPPPPPPP MMMMMMMM MM000000 | |
19 // * S is 1 | |
20 // * P's are the log2(M)-1 | |
21 // * M's are the magnitude of V | |
22 // * 0's are padding | |
23 // * Additionally, if the number is negative, invert the bits of all the bytes | |
24 // (e.g. XOR 0xFF). This makes the sign bit S 0 for negative numbers, and | |
25 // makes the ordering of the numbers correct when compared bytewise. | |
26 package funnybase | |
27 | |
28 import ( | |
29 "bytes" | |
30 "errors" | |
31 "io" | |
32 "math" | |
33 ) | |
34 | |
35 // MaxFunnyBaseLenN is the maximum length of a funnybase-encoded N-bit integer. | |
36 const ( | |
37 MaxFunnyBaseLen16 = 3 | |
38 MaxFunnyBaseLen32 = 5 | |
39 MaxFunnyBaseLen64 = 9 | |
40 ) | |
41 | |
42 // ErrOutOfBounds is panic'd when using Put/PutUint with a buffer that's | |
43 // not large enough to hold the value being encoded. | |
44 var ErrOutOfBounds = errors.New("funnybase: buffer was too small") | |
45 | |
46 // ErrOverflow is returned when reading an number which is too large for the | |
47 // destination type (either uint64 or int64) | |
48 var ErrOverflow = errors.New("funnybase: varint overflows") | |
49 | |
50 // ErrUnderflow is returned when reading an number which is too small for the | |
51 // destination type (either uint64 or int64) | |
52 var ErrUnderflow = errors.New("funnybase: uvarint underflows") | |
53 | |
54 var paddingMasks = [...]uint64{ | |
55 0xFFFFFFFF00000000, | |
56 0xFFFF0000, | |
57 0xFF00, | |
58 0xF0, | |
59 0xC, | |
60 0x2, | |
61 0x1, | |
62 } | |
63 | |
64 // Calculate the log2 of the unsigned value v. | |
65 // | |
66 // This is used to find the position of the highest-set bit in v. | |
67 // | |
68 // from https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog | |
69 // 32 bit implementation extended to 64 bits | |
70 func uint64Log2(v uint64) uint { | |
71 log := uint(0) | |
72 for i, m := range paddingMasks { | |
73 if v&m != 0 { | |
74 shift := uint(1<<uint(len(paddingMasks)-2)) >> uint(i) | |
75 v >>= shift | |
76 log |= shift | |
77 } | |
78 } | |
79 return log + 1 | |
80 } | |
81 | |
82 // Put encodes an int64 into buf and returns the number of bytes written. | |
83 // If the buffer's length is too small, Put will panic. | |
84 func Put(buf []byte, val int64) uint { | |
85 r, err := write(bytes.NewBuffer(buf[:0]), val, uint(len(buf))) | |
86 if err != nil { | |
87 panic(err) | |
88 } | |
89 return r | |
90 } | |
91 | |
92 // Write val as a funnybase to the ByteWriter. Will only return an error if | |
93 // the underlying ByteWriter returns an error. | |
94 func Write(w io.ByteWriter, val int64) error { | |
95 _, err := write(w, val, MaxFunnyBaseLen64) | |
96 return err | |
97 } | |
98 | |
99 func write(w io.ByteWriter, val int64, byteLimit uint) (uint, error) { | |
100 var inv byte | |
101 if val < 0 { | |
102 inv = 0xff | |
103 } | |
104 mag := uint64(val) | |
105 if inv != 0 { | |
106 mag = -mag | |
107 } | |
108 return writeSignMag(w, mag, inv, byteLimit) | |
109 } | |
110 | |
111 // PutUint encodes an int64 into buf and returns the number of bytes written. | |
112 // If the buffer's length is too small, Put will panic. | |
113 func PutUint(buf []byte, mag uint64) uint { | |
114 r, err := writeSignMag(bytes.NewBuffer(buf[:0]), mag, 0, uint(len(buf))) | |
115 if err != nil { | |
116 panic(err) | |
117 } | |
118 return r | |
119 } | |
120 | |
121 // WriteUint writes mag to the ByteWriter. Will only return an error if the | |
122 // underlying ByteWriter returns an error. | |
123 func WriteUint(w io.ByteWriter, mag uint64) error { | |
124 _, err := writeSignMag(w, mag, 0, MaxFunnyBaseLen64) | |
125 return err | |
126 } | |
127 | |
128 // Get decodes a funnybase-encoded number from a byte slice. Returns | |
129 // the decoded value as well as the number of bytes read. If an error | |
130 // occurs, the value is 0, and n will be: | |
131 // | |
132 // n == 0: buf too small | |
133 // n == -1: value overflows int64 | |
134 // n == -2: value underflows int64 | |
135 func Get(b []byte) (r int64, n int) { | |
136 buf := bytes.NewBuffer(b) | |
137 r, err := Read(buf) | |
138 switch err { | |
139 case ErrOverflow: | |
140 return 0, -1 | |
141 case ErrUnderflow: | |
142 return 0, -2 | |
143 case io.EOF: | |
144 return 0, 0 | |
145 } | |
146 n = len(b) - buf.Len() | |
147 return | |
148 } | |
149 | |
150 // GetUint decodes a funnybase-encoded number from a byte slice. Returns | |
151 // the decoded value as well as the number of bytes read. If an error | |
152 // occurs, the value is 0, and n will be: | |
153 // | |
154 // n == 0: buf too small | |
155 // n == -1: value overflows uint64 | |
156 // n == -2: value underflows uint64 | |
157 func GetUint(b []byte) (mag uint64, n int) { | |
158 buf := bytes.NewBuffer(b) | |
159 mag, err := ReadUint(buf) | |
160 switch err { | |
161 case ErrOverflow: | |
162 return 0, -1 | |
163 case ErrUnderflow: | |
164 return 0, -2 | |
165 case io.EOF: | |
166 return 0, 0 | |
167 } | |
168 n = len(b) - buf.Len() | |
169 return | |
170 } | |
171 | |
172 // Read decodes a funnybase-encoded number from a ByteReader. It | |
173 // returns err{Over,Under}flow if the number is out of bounds. It may also | |
174 // return an error if the ByteReader returns an error. | |
175 func Read(r io.ByteReader) (int64, error) { | |
176 pos, sigs, mag, err := readSignMag(r) | |
177 if err != nil { | |
178 return 0, err | |
179 } | |
180 if pos { | |
181 if sigs > 63 { | |
182 return 0, ErrOverflow | |
183 } | |
184 return int64(mag), nil | |
185 } | |
186 if mag > uint64(-math.MinInt64) { | |
187 return 0, ErrUnderflow | |
188 } | |
189 return int64(-mag), nil | |
190 } | |
191 | |
192 // ReadUint decodes a funnybase-encoded positive number from a ByteReader. It | |
193 // returns err{Over,Under}flow if the number is out of bounds. It may also | |
194 // return an error if the ByteReader returns an error. | |
195 func ReadUint(r io.ByteReader) (uint64, error) { | |
196 pos, _, mag, err := readSignMag(r) | |
197 if err != nil { | |
198 return 0, err | |
199 } | |
200 if !pos { | |
201 return 0, ErrUnderflow | |
202 } | |
203 return mag, err | |
204 } | |
205 | |
206 func writeSignMag(w io.ByteWriter, mag uint64, inv byte, byteLimit uint) (i uint
, err error) { | |
207 sigs := uint64Log2(mag) | |
208 | |
209 wb := func(b byte) error { | |
210 i++ | |
211 if i > byteLimit { | |
212 return ErrOutOfBounds | |
213 } | |
214 return w.WriteByte(b) | |
215 } | |
216 | |
217 if err = wb(byte(0x80|(sigs-1)) ^ inv); err != nil { | |
218 return | |
219 } | |
220 | |
221 for sigs > 8 { | |
222 sigs -= 8 | |
223 | |
224 if err = wb(byte(mag>>sigs) ^ inv); err != nil { | |
225 return | |
226 } | |
227 } | |
228 if sigs != 0 { | |
229 if err = wb(byte(mag<<(8-sigs)) ^ inv); err != nil { | |
230 return | |
231 } | |
232 } | |
233 | |
234 return | |
235 } | |
236 | |
237 func readSignMag(r io.ByteReader) (positive bool, sigs uint, mag uint64, err err
or) { | |
238 var inv byte | |
239 | |
240 b0, err := r.ReadByte() | |
241 if err != nil { | |
242 return | |
243 } | |
244 positive = true | |
245 if b0&0x80 == 0 { | |
246 positive = false | |
247 inv = 0xff | |
248 } | |
249 | |
250 sigs = uint((b0^inv)&0x7f) + 1 | |
251 if sigs > 64 { | |
252 err = ErrOverflow | |
253 return | |
254 } | |
255 | |
256 n := int((sigs+7)>>3) + 1 | |
257 | |
258 var b byte | |
259 shift := uint(64 - 8) | |
260 for i := 1; i < n; i++ { | |
261 b, err = r.ReadByte() | |
262 if err != nil { | |
263 return | |
264 } | |
265 mag |= uint64(b^inv) << shift | |
266 shift -= 8 | |
267 } | |
268 mag >>= 64 - sigs | |
269 | |
270 return | |
271 } | |
OLD | NEW |