| OLD | NEW |
| (Empty) |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 // Package funnybase provides a varint-like signed integer encoding scheme which | |
| 6 // has the property that the encoded integers can be compared with unordered | |
| 7 // byte comparisons and they will sort correctly. | |
| 8 // | |
| 9 // It's less efficient on average than varint for small numbers (it has | |
| 10 // a minimum encoded size of 2 bytes), but is more efficient for large numbers | |
| 11 // (it has a maximum encoded size of 9 bytes for a 64 bit int). | |
| 12 // | |
| 13 // The scheme works like: | |
| 14 // * given an 2's compliment value V | |
| 15 // * extract the sign (S) and magnitude (M) of V | |
| 16 // * Find the position of the highest bit (P), minus 1. | |
| 17 // * write (bits): | |
| 18 // * SPPPPPPP MMMMMMMM MM000000 | |
| 19 // * S is 1 | |
| 20 // * P's are the log2(M)-1 | |
| 21 // * M's are the magnitude of V | |
| 22 // * 0's are padding | |
| 23 // * Additionally, if the number is negative, invert the bits of all the bytes | |
| 24 // (e.g. XOR 0xFF). This makes the sign bit S 0 for negative numbers, and | |
| 25 // makes the ordering of the numbers correct when compared bytewise. | |
| 26 package funnybase | |
| 27 | |
| 28 import ( | |
| 29 "bytes" | |
| 30 "errors" | |
| 31 "io" | |
| 32 "math" | |
| 33 ) | |
| 34 | |
| 35 // MaxFunnyBaseLenN is the maximum length of a funnybase-encoded N-bit integer. | |
| 36 const ( | |
| 37 MaxFunnyBaseLen16 = 3 | |
| 38 MaxFunnyBaseLen32 = 5 | |
| 39 MaxFunnyBaseLen64 = 9 | |
| 40 ) | |
| 41 | |
| 42 // ErrOutOfBounds is panic'd when using Put/PutUint with a buffer that's | |
| 43 // not large enough to hold the value being encoded. | |
| 44 var ErrOutOfBounds = errors.New("funnybase: buffer was too small") | |
| 45 | |
| 46 // ErrOverflow is returned when reading an number which is too large for the | |
| 47 // destination type (either uint64 or int64) | |
| 48 var ErrOverflow = errors.New("funnybase: varint overflows") | |
| 49 | |
| 50 // ErrUnderflow is returned when reading an number which is too small for the | |
| 51 // destination type (either uint64 or int64) | |
| 52 var ErrUnderflow = errors.New("funnybase: uvarint underflows") | |
| 53 | |
| 54 var paddingMasks = [...]uint64{ | |
| 55 0xFFFFFFFF00000000, | |
| 56 0xFFFF0000, | |
| 57 0xFF00, | |
| 58 0xF0, | |
| 59 0xC, | |
| 60 0x2, | |
| 61 0x1, | |
| 62 } | |
| 63 | |
| 64 // Calculate the log2 of the unsigned value v. | |
| 65 // | |
| 66 // This is used to find the position of the highest-set bit in v. | |
| 67 // | |
| 68 // from https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog | |
| 69 // 32 bit implementation extended to 64 bits | |
| 70 func uint64Log2(v uint64) uint { | |
| 71 log := uint(0) | |
| 72 for i, m := range paddingMasks { | |
| 73 if v&m != 0 { | |
| 74 shift := uint(1<<uint(len(paddingMasks)-2)) >> uint(i) | |
| 75 v >>= shift | |
| 76 log |= shift | |
| 77 } | |
| 78 } | |
| 79 return log + 1 | |
| 80 } | |
| 81 | |
| 82 // Put encodes an int64 into buf and returns the number of bytes written. | |
| 83 // If the buffer's length is too small, Put will panic. | |
| 84 func Put(buf []byte, val int64) uint { | |
| 85 r, err := write(bytes.NewBuffer(buf[:0]), val, uint(len(buf))) | |
| 86 if err != nil { | |
| 87 panic(err) | |
| 88 } | |
| 89 return r | |
| 90 } | |
| 91 | |
| 92 // Write val as a funnybase to the ByteWriter. Will only return an error if | |
| 93 // the underlying ByteWriter returns an error. | |
| 94 func Write(w io.ByteWriter, val int64) error { | |
| 95 _, err := write(w, val, MaxFunnyBaseLen64) | |
| 96 return err | |
| 97 } | |
| 98 | |
| 99 func write(w io.ByteWriter, val int64, byteLimit uint) (uint, error) { | |
| 100 var inv byte | |
| 101 if val < 0 { | |
| 102 inv = 0xff | |
| 103 } | |
| 104 mag := uint64(val) | |
| 105 if inv != 0 { | |
| 106 mag = -mag | |
| 107 } | |
| 108 return writeSignMag(w, mag, inv, byteLimit) | |
| 109 } | |
| 110 | |
| 111 // PutUint encodes an int64 into buf and returns the number of bytes written. | |
| 112 // If the buffer's length is too small, Put will panic. | |
| 113 func PutUint(buf []byte, mag uint64) uint { | |
| 114 r, err := writeSignMag(bytes.NewBuffer(buf[:0]), mag, 0, uint(len(buf))) | |
| 115 if err != nil { | |
| 116 panic(err) | |
| 117 } | |
| 118 return r | |
| 119 } | |
| 120 | |
| 121 // WriteUint writes mag to the ByteWriter. Will only return an error if the | |
| 122 // underlying ByteWriter returns an error. | |
| 123 func WriteUint(w io.ByteWriter, mag uint64) error { | |
| 124 _, err := writeSignMag(w, mag, 0, MaxFunnyBaseLen64) | |
| 125 return err | |
| 126 } | |
| 127 | |
| 128 // Get decodes a funnybase-encoded number from a byte slice. Returns | |
| 129 // the decoded value as well as the number of bytes read. If an error | |
| 130 // occurs, the value is 0, and n will be: | |
| 131 // | |
| 132 // n == 0: buf too small | |
| 133 // n == -1: value overflows int64 | |
| 134 // n == -2: value underflows int64 | |
| 135 func Get(b []byte) (r int64, n int) { | |
| 136 buf := bytes.NewBuffer(b) | |
| 137 r, err := Read(buf) | |
| 138 switch err { | |
| 139 case ErrOverflow: | |
| 140 return 0, -1 | |
| 141 case ErrUnderflow: | |
| 142 return 0, -2 | |
| 143 case io.EOF: | |
| 144 return 0, 0 | |
| 145 } | |
| 146 n = len(b) - buf.Len() | |
| 147 return | |
| 148 } | |
| 149 | |
| 150 // GetUint decodes a funnybase-encoded number from a byte slice. Returns | |
| 151 // the decoded value as well as the number of bytes read. If an error | |
| 152 // occurs, the value is 0, and n will be: | |
| 153 // | |
| 154 // n == 0: buf too small | |
| 155 // n == -1: value overflows uint64 | |
| 156 // n == -2: value underflows uint64 | |
| 157 func GetUint(b []byte) (mag uint64, n int) { | |
| 158 buf := bytes.NewBuffer(b) | |
| 159 mag, err := ReadUint(buf) | |
| 160 switch err { | |
| 161 case ErrOverflow: | |
| 162 return 0, -1 | |
| 163 case ErrUnderflow: | |
| 164 return 0, -2 | |
| 165 case io.EOF: | |
| 166 return 0, 0 | |
| 167 } | |
| 168 n = len(b) - buf.Len() | |
| 169 return | |
| 170 } | |
| 171 | |
| 172 // Read decodes a funnybase-encoded number from a ByteReader. It | |
| 173 // returns err{Over,Under}flow if the number is out of bounds. It may also | |
| 174 // return an error if the ByteReader returns an error. | |
| 175 func Read(r io.ByteReader) (int64, error) { | |
| 176 pos, sigs, mag, err := readSignMag(r) | |
| 177 if err != nil { | |
| 178 return 0, err | |
| 179 } | |
| 180 if pos { | |
| 181 if sigs > 63 { | |
| 182 return 0, ErrOverflow | |
| 183 } | |
| 184 return int64(mag), nil | |
| 185 } | |
| 186 if mag > uint64(-math.MinInt64) { | |
| 187 return 0, ErrUnderflow | |
| 188 } | |
| 189 return int64(-mag), nil | |
| 190 } | |
| 191 | |
| 192 // ReadUint decodes a funnybase-encoded positive number from a ByteReader. It | |
| 193 // returns err{Over,Under}flow if the number is out of bounds. It may also | |
| 194 // return an error if the ByteReader returns an error. | |
| 195 func ReadUint(r io.ByteReader) (uint64, error) { | |
| 196 pos, _, mag, err := readSignMag(r) | |
| 197 if err != nil { | |
| 198 return 0, err | |
| 199 } | |
| 200 if !pos { | |
| 201 return 0, ErrUnderflow | |
| 202 } | |
| 203 return mag, err | |
| 204 } | |
| 205 | |
| 206 func writeSignMag(w io.ByteWriter, mag uint64, inv byte, byteLimit uint) (i uint
, err error) { | |
| 207 sigs := uint64Log2(mag) | |
| 208 | |
| 209 wb := func(b byte) error { | |
| 210 i++ | |
| 211 if i > byteLimit { | |
| 212 return ErrOutOfBounds | |
| 213 } | |
| 214 return w.WriteByte(b) | |
| 215 } | |
| 216 | |
| 217 if err = wb(byte(0x80|(sigs-1)) ^ inv); err != nil { | |
| 218 return | |
| 219 } | |
| 220 | |
| 221 for sigs > 8 { | |
| 222 sigs -= 8 | |
| 223 | |
| 224 if err = wb(byte(mag>>sigs) ^ inv); err != nil { | |
| 225 return | |
| 226 } | |
| 227 } | |
| 228 if sigs != 0 { | |
| 229 if err = wb(byte(mag<<(8-sigs)) ^ inv); err != nil { | |
| 230 return | |
| 231 } | |
| 232 } | |
| 233 | |
| 234 return | |
| 235 } | |
| 236 | |
| 237 func readSignMag(r io.ByteReader) (positive bool, sigs uint, mag uint64, err err
or) { | |
| 238 var inv byte | |
| 239 | |
| 240 b0, err := r.ReadByte() | |
| 241 if err != nil { | |
| 242 return | |
| 243 } | |
| 244 positive = true | |
| 245 if b0&0x80 == 0 { | |
| 246 positive = false | |
| 247 inv = 0xff | |
| 248 } | |
| 249 | |
| 250 sigs = uint((b0^inv)&0x7f) + 1 | |
| 251 if sigs > 64 { | |
| 252 err = ErrOverflow | |
| 253 return | |
| 254 } | |
| 255 | |
| 256 n := int((sigs+7)>>3) + 1 | |
| 257 | |
| 258 var b byte | |
| 259 shift := uint(64 - 8) | |
| 260 for i := 1; i < n; i++ { | |
| 261 b, err = r.ReadByte() | |
| 262 if err != nil { | |
| 263 return | |
| 264 } | |
| 265 mag |= uint64(b^inv) << shift | |
| 266 shift -= 8 | |
| 267 } | |
| 268 mag >>= 64 - sigs | |
| 269 | |
| 270 return | |
| 271 } | |
| OLD | NEW |