src/grisu3.cc - Issue 619005: Fast algorithm for double->string conversion.

Side by Side Diff: src/grisu3.cc

Issue 619005: Fast algorithm for double->string conversion. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 10 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright 2010 the V8 project authors. All rights reserved.

	2 // Redistribution and use in source and binary forms, with or without

	3 // modification, are permitted provided that the following conditions are

	4 // met:

	5 //

	6 // * Redistributions of source code must retain the above copyright

	7 // notice, this list of conditions and the following disclaimer.

	8 // * Redistributions in binary form must reproduce the above

	9 // copyright notice, this list of conditions and the following

	10 // disclaimer in the documentation and/or other materials provided

	11 // with the distribution.

	12 // * Neither the name of Google Inc. nor the names of its

	13 // contributors may be used to endorse or promote products derived

	14 // from this software without specific prior written permission.

	15 //

	16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	27

	28 #include "v8.h"

	29

	30 #include "grisu3.h"

	31 #include "cached_powers.h"

	32 #include "diy_fp.h"

	33 #include "double.h"

	34

	35 namespace v8 {

	36 namespace internal {

	37

	38 template <int alpha = -60, int gamma = -32>

	39 class Grisu3 {

	40 public:

	41 // Provides a decimal representation of v.

	42 // Returns true if it succeeds, otherwise the result can not be trusted.

	43 // There will be *length digits inside the buffer (not null-terminated).

	44 // If the function returns true then v == (double) (buffer * 10^K).

	45 // The digits in the buffer are the shortest representation possible: no

	46 // 0.099999999999 instead of 0.1.

	47 // The last digit will be closest to the actual v. That is, even if several

	48 // digits might correctly yield 'v' when read again, the closest will be

	49 // computed.

	50 static bool grisu3(double v, char* buffer, int* length, int* K);

	51

	52 private:

	53 static bool RoundWeed(char* buffer, int len, uint64_t wp_W, uint64_t Delta,

	54 uint64_t rest, uint64_t ten_kappa, uint64_t ulp);

	55 static bool DigitGen(DiyFp low, DiyFp w, DiyFp high,

	56 char* buffer, int* len, int* kappa);

	57 static bool DigitGen_m60_m32(DiyFp low, DiyFp w, DiyFp high,

	58 char* buffer, int* length, int* kappa);

	59 };

	60

	61

	62 template<int alpha, int gamma>

	63 bool Grisu3<alpha, gamma>::grisu3(double v, char* buffer, int* length, int* K) {

	64 DiyFp w = Double(v).AsNormalizedDiyFp();

	65 // m_minus and m_plus are the boundaries between w and its neighbors. Any

	66 // number x such that m_minus < x < m_plus will round to v when read as

	67 // double. When m_minus == x or m_plus == y then the rounding direction

	68 // depends on v. Grisu3 does not need to deal with this case, as its precision

	69 // is not sufficient for this case anyways.

	70 DiyFp m_minus, m_plus;

	71 Double(v).NormalizedBoundaries(&m_minus, &m_plus);

	72 ASSERT(m_plus.e() == w.e());

	73 DiyFp ten_mk; // Cached power of ten: 10^-k

	74 int mk; // -k

	75 GetCachedPower(w.e() + DiyFp::kSignificandSize, alpha, gamma, &mk, &ten_mk);

	76 ASSERT(alpha <= w.e() + ten_mk.e() + DiyFp::kSignificandSize &&

	77 gamma >= w.e() + ten_mk.e() + DiyFp::kSignificandSize);

	78 // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a

	79 // 64 bit significand and ten_mk is thus only precise up to 64 bits.

	80

	81 // The DiyFp::Times procedure rounds its result, and ten_mk is approximated

	82 // too. The variable scaled_w (as well as scaled_m_minus/plus) are now off

	83 // by a small amount.

	84 // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w.

	85 // In other words: let f = scaled_w.f() and e = scaled_w.e(), then

	86 // (f-1) * 2^e < w10^k < (f+1) 2^e

	87 DiyFp scaled_w = DiyFp::Times(w, ten_mk);

	88 ASSERT(scaled_w.e() == m_plus.e() + ten_mk.e() + DiyFp::kSignificandSize);

	89 // In theory it would be possible to avoid some recomputations by computing

	90 // the difference between w and m_minus/plus (a power of 2) and to compute

	91 // scaled_m_minus/plus by subtracting/adding from scaled_w. However the

	92 // code becomes much less readable and the speed enhancements are not

	93 // terrible.

	94 DiyFp scaled_m_minus = DiyFp::Times(m_minus, ten_mk);

	95 DiyFp scaled_m_plus = DiyFp::Times(m_plus, ten_mk);

	96

	97 // DigitGen will generate the digits of scaled_w. Therefore we have

	98 // v == (double) (scaled_w * 10^-mk).

	99 // Set K == -mk and pass it to DigitGen. If scaled_w is a comma-number it will

	100 // be updated.

	101 int kappa;

	102 bool result =

	103 DigitGen(scaled_m_minus, scaled_w, scaled_m_plus, buffer, length, &kappa);

	104 *K = -mk + kappa;

	105 return result;

	106 }

	107

	108 // Generates the digits of input number w.

	109 // w is a floating-point number (DiyFp), consisting of a significand and an

	110 // exponent. Its exponent is bounded by alpha and gamma. Typically alpha >= -63

	111 // and gamma <= 3.

	112 // Returns false if it fails, in which case the generated digits in the buffer

	113 // should not be used.

	114 // Preconditions:

	115 // * low, w and high are correct up to 1 ulp (unit in the last place). That

	116 // is, their error must be less that a unit of their last digits.

	117 // * low.e() == w.e() == high.e()

	118 // * low < w < high, and taking into account their error: low~ <= high~

	119 // * alpha <= w.e() <= gamma

	120 // Postconditions: returns false if procedure fails.

	121 // otherwise:

	122 // * buffer is not null-terminated, but len contains the number of digits.

	123 // * buffer contains the shortest possible decimal digit-sequence

	124 // such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the

	125 // correct values of low and high (without their error).

	126 // * if more than one decimal representation gives the minimal number of

	127 // decimal digits then the one closest to W (where W is the correct value

	128 // of w) is chosen.

	129 // Remark: this procedure takes into account the imprecission of its input
	fschneider 2010/02/22 16:25:24 Typo: -> precision Typo: -> precision Florian Loitsch 2010/02/23 09:05:10 Done. Show quoted text On 2010/02/22 16:25:24, fschneider wrote: > Typo: -> precision Done.
	130 // numbers. If the precision is not enough to guarantee all the postconditions

	131 // then false is returned. This usually happens rarely (~0.5%).

	132 template<int alpha, int gamma>

	133 bool Grisu3<alpha, gamma>::DigitGen(DiyFp low, DiyFp w, DiyFp high,

	134 char* buffer, int* len, int* kappa) {

	135 ASSERT(low.e() == w.e() && w.e() == high.e());

	136 ASSERT(low.f() + 1 <= high.f() - 1);

	137 ASSERT(alpha <= w.e() && w.e() <= gamma);

	138 // The following tests use alpha and gamma to avoid unnecessary dynamic tests.

	139 if ((alpha >= -60 && gamma <= -32) \|\| // -60 <= w.e() <= -32

	140 (alpha <= -32 && gamma >= -60 && // Alpha/gamma overlaps -60/-32 region.

	141 -60 <= w.e() && w.e() <= -32)) {

	142 return DigitGen_m60_m32(low, w, high, buffer, len, kappa);

	143 } else {

	144 // A simple adaption of the special case -60/-32 would allow greater ranges

	145 // of alpha/gamma and thus reduce the number of precomputed cached powers of

	146 // ten.

	147 UNIMPLEMENTED();

	148 return false;

	149 }

	150 }

	151

	152 static const uint32_t kTen4 = 10000;

	153 static const uint32_t kTen5 = 100000;

	154 static const uint32_t kTen6 = 1000000;

	155 static const uint32_t kTen7 = 10000000;

	156 static const uint32_t kTen8 = 100000000;

	157 static const uint32_t kTen9 = 1000000000;

	158

	159 // Returns the biggest power of ten that is <= than the given number. We

	160 // furthermore receive the maximum number of bits 'number' has.

	161 // If number_bits == 0 then 0^-1 is returned

	162 // The number of bits must be <= 32.

	163 static void BiggestPowerTen(uint32_t number, int number_bits, uint32_t* power, i nt* exponent) {

	164 switch (number_bits) {

	165 case 30:

	166 case 31:

	167 case 32:
	Lasse Reichstein 2010/02/22 11:31:36 For readability, could you reverse the order of th For readability, could you reverse the order of the cases in each group, so they come in decreasing order Florian Loitsch 2010/02/22 15:52:53 Done. Show quoted text On 2010/02/22 11:31:36, Lasse Reichstein wrote: > For readability, could you reverse the order of the cases in each group, so they > come in decreasing order Done. floitsch 2012/04/10 15:54:34 Done. Show quoted text On 2010/02/22 11:31:36, Lasse Reichstein wrote: > For readability, could you reverse the order of the cases in each group, so they > come in decreasing order Done.
	168 if (kTen9 <= number) {

	169 *power = kTen9;

	170 *exponent = 9;

	171 break;

	172 } // else fallthrough

	173 case 27:

	174 case 28:

	175 case 29:

	176 if (kTen8 <= number) {

	177 *power = kTen8;

	178 *exponent = 8;

	179 break;

	180 } // else fallthrough

	181 case 24:

	182 case 25:

	183 case 26:

	184 if (kTen7 <= number) {

	185 *power = kTen7;

	186 *exponent = 7;

	187 break;

	188 } // else fallthrough

	189 case 20:

	190 case 21:

	191 case 22:

	192 case 23:

	193 if (kTen6 <= number) {

	194 *power = kTen6;

	195 *exponent = 6;

	196 break;

	197 } // else fallthrough

	198 case 17:

	199 case 18:

	200 case 19:

	201 if (kTen5 <= number) {

	202 *power = kTen5;

	203 *exponent = 5;

	204 break;

	205 } // else fallthrough

	206 case 14:

	207 case 15:

	208 case 16:

	209 if (kTen4 <= number) {

	210 *power = kTen4;

	211 *exponent = 4;

	212 break;

	213 } // else fallthrough

	214 case 10:

	215 case 11:

	216 case 12:

	217 case 13:

	218 if (1000 <= number) {

	219 *power = 1000;

	220 *exponent = 3;

	221 break;

	222 } // else fallthrough

	223 case 7:

	224 case 8:

	225 case 9:

	226 if (100 <= number) {

	227 *power = 100;

	228 *exponent = 2;

	229 break;

	230 } // else fallthrough

	231 case 4:

	232 case 5:

	233 case 6:

	234 if (10 <= number) {

	235 *power = 10;

	236 *exponent = 1;

	237 break;

	238 } // else fallthrough

	239 case 1:

	240 case 2:

	241 case 3:

	242 if (1 <= number) {

	243 *power = 1;

	244 *exponent = 0;

	245 break;

	246 } // else fallthrough

	247 case 0:

	248 *power = 0;

	249 *exponent = -1;

	250 break;

	251 default:

	252 // Following assignments are here to silence compiler warnings.

	253 *power = 0;

	254 *exponent = 0;

	255 UNREACHABLE();

	256 }

	257 }

	258

	259

	260 // Same comments as for DigitGen but with additional precondition:

	261 // -60 <= w.e() <= -32

	262 //

	263 // Say, for the sake of example, that

	264 // w.e() == -48, and w.f() == 0x1234567890abcdef

	265 // w's value can be computed by w.f() * 2^w.e()

	266 // We can obtain w's integral by simply shifting w.f() by -w.e().

	267 // -> w's integral is 0x1234

	268 // w's fractional part is therefore 0x567890abcdef.

	269 // Printing w's integral part is easy (simply print 0x1234 in decimal).

	270 // In order to print its fraction we repeatedly multiply the fraction by 10 and

	271 // get each digit. Example the first digit after the comma would be computed by

	272 // (0x567890abcdef * 10) >> 48. -> 3

	273 // The whole thing becomes slightly more complicated because we want to stop

	274 // once we have enough digits. That is, once the digits inside the buffer

	275 // represent 'w' we can stop. Everything inside the interval low - high

	276 // represents w. However we have to pay attention to low, high and w's

	277 // imprecision.

	278 template<int alpha, int gamma>

	279 bool Grisu3<alpha, gamma>::DigitGen_m60_m32(

	280 DiyFp low, DiyFp w, DiyFp high, char* buffer, int* length, int* kappa) {

	281 // low, w and high are imprecise, but by less than one ulp (unit in the last

	282 // place).

	283 // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that

	284 // the new numbers are outside of the interval we want the final

	285 // representation to lie in.

	286 // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield

	287 // numbers that are certain to lie in the interval. We will use this fact

	288 // later on.

	289 // We will now start by generating the digits within the uncertain

	290 // interval. Later we will weed out representations that lie outside the safe

	291 // interval and thus _might_ lie outside the correct interval.

	292 uint64_t unit = 1;

	293 DiyFp too_low = DiyFp(low.f() - unit, low.e());

	294 DiyFp too_high = DiyFp(high.f() + unit, high.e());

	295 // too_low and too_high are guaranteed to lie outside the interval we want the

	296 // generated number in.

	297 DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low);

	298 // We now cut the input number into two parts: the integrals and the

	299 // fractionals. We will not write any decimal separator though, but adapt

	300 // kappa instead.

	301 // Reminder: we are currently computing the digits (stored inside the buffer)

	302 // such that: too_low < buffer * 10^kappa < too_high

	303 // We use too_high for the digit_generation and stop as soon as possible.

	304 // If we stop early we effectively round down.

	305 DiyFp one = DiyFp(static_cast<uint64_t>(1) << -w.e(), w.e());

	306 uint32_t integrals = too_high.f() >> -one.e(); // Division by one.

	307 uint64_t fractionals = too_high.f() & (one.f() - 1); // Modulo by one.

	308 uint32_t divider;

	309 int divider_exponent;

	310 BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()),

	311 &divider, &divider_exponent);

	312 *kappa = divider_exponent + 1;

	313 *length = 0;

	314 // Loop invariant: buffer = too_high / 10^kappa (integer division)

	315 // The invariant holds for the first iteration: kappa has been initialized

	316 // with the divider exponent + 1. And the divider is the biggest power of ten

	317 // that fits into the bits that had been reserved for the integrals.

	318 while (*kappa > 0) {

	319 int digit = integrals / divider;

	320 buffer[*length] = '0' + digit;

	321 (*length)++;

	322 integrals %= divider;

	323 (*kappa)--;

	324 // Note that kappa now equals the exponent of the divider and that the

	325 // invariant thus holds again.

	326 uint64_t rest =

	327 (static_cast<uint64_t>(integrals) << -one.e()) + fractionals;

	328 // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e())

	329 // Reminder: unsafe_interval.e() == one.e()

	330 if (rest < unsafe_interval.f()) {

	331 // Rounding down (by not emitting the remaining digits) yields a number

	332 // that lies within the unsafe interval.

	333 return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(),

	334 unsafe_interval.f(), rest,

	335 static_cast<uint64_t>(divider) << -one.e(), unit);

	336 }

	337 divider /= 10;

	338 }

	339 // The integrals have been generated. We are at the point of the decimal

	340 // separator. In the following loop we simply multiply the remaining digits by

	341 // 10 and divide by one. We just need to pay attention to multiply associated

	342 // data (like the interval or 'unit'), too.

	343 // Instead of multiplying by 10 we multiply by 5 (cheaper operation) and

	344 // increase its (imaginary) exponent. At the same time we decrease the

	345 // divider's (one's) exponent and shift its significand.

	346 // Basically, if fractionals was a DiyFp (with fractionals.e == one.e):

	347 // fractionals.f *= 10;

	348 // fractionals.f >>= 1; fractionals.e++; // value remains unchanged.

	349 // one.f >>= 1; one.e++; // value remains unchanged.

	350 // and we have again fractionals.e == one.e which allows us to divide

	351 // fractionals.f() by one.f()

	352 // We simply combine the *= 10 and the >>= 1.

	353 while (true) {

	354 fractionals *= 5;

	355 unit *= 5;

	356 unsafe_interval.set_f(unsafe_interval.f() * 5);

	357 unsafe_interval.set_e(unsafe_interval.e() + 1); // Will be optimized out.

	358 one.set_f(one.f() >> 1);

	359 one.set_e(one.e() + 1);

	360 int digit = fractionals >> -one.e(); // Integer division by one.

	361 buffer[*length] = '0' + digit;

	362 (*length)++;

	363 fractionals &= one.f() - 1; // Modulo by one.

	364 (*kappa)--;

	365 if (fractionals < unsafe_interval.f()) {

	366 return RoundWeed(buffer, length, DiyFp::Minus(too_high, w).f() unit,

	367 unsafe_interval.f(), fractionals, one.f(), unit);

	368 }

	369 }

	370 }

	371

	372

	373 // Rounds the given generated digits in the buffer and weeds out generated

	374 // digits that are not in the safe interval, or where we cannot find a rounded

	375 // representation.

	376 // Input: * buffer containing the digits of too_high / 10^kappa

	377 // * the buffer's length

	378 // * distance_too_high_w == (too_high - w).f() * unit

	379 // * unsafe_interval == (too_high - too_low).f() * unit

	380 // * rest = (too_high - buffer * 10^kappa).f() * unit

	381 // * ten_kappa = 10^kappa * unit

	382 // * unit = the common multiplier

	383 // Output: returns true on success.

	384 // Modifies the generated digits in the buffer to approach (round towards) w.

	385 template<int alpha, int gamma>

	386 bool Grisu3<alpha, gamma>::RoundWeed(

	387 char* buffer, int length, uint64_t distance_too_high_w,

	388 uint64_t unsafe_interval, uint64_t rest, uint64_t ten_kappa,

	389 uint64_t unit) {

	390 uint64_t small_distance = distance_too_high_w - unit;

	391 uint64_t big_distance = distance_too_high_w + unit;

	392 // Let w- = too_high - big_distance, and

	393 // w+ = too_high - small_distance.

	394 // Note: w- < w < w+

	395 //

	396 // The real w (* unit) must lie somewhere inside the interval

	397 // ]w-; w+[ (also often written as (w-; w+))

	398

	399 // Basically the buffer currently contains a number in the unsafe interval

	400 // ]too_low; too_high[ with too_low < w < too_high

	401 //

	402 // By generating the digits of too_high we got the biggest last digit.

	403 // In the case that w+ < buffer < too_high we try to decrement the buffer.

	404 // This way the buffer approaches (rounds towards) w.

	405 // There are 3 conditions that stop the decrementation process:

	406 // 1) the buffer is already below w+

	407 // 2) decrementing the buffer would make it leave the unsafe interval

	408 // 3) decrementing the buffer would yield a number below w+ and farther away

	409 // than the current number. In other words:

	410 // (buffer{-1} < w+) && w+ - buffer{-1} > buffer - w+

	411 // Instead of using the buffer directly we use its distance to too_high.

	412 // Conceptually rest ~= too_high - buffer

	413 while (rest < small_distance && // condition 1

	414 unsafe_interval - rest >= ten_kappa && // condition 2

	415 (rest + ten_kappa < small_distance \|\| // buffer{-1} > w+

	416 small_distance - rest >= rest + ten_kappa - small_distance)) {

	417 buffer[length - 1]--;

	418 rest += ten_kappa;

	419 }

	420

	421 // We have approached w+ as much as possible. We now test if approaching w-

	422 // would require changing the buffer. If yes, then we have two possible

	423 // representations close to w, but we cannot decide which one is closer.

	424 if (rest < big_distance &&

	425 unsafe_interval - rest >= ten_kappa &&

	426 (rest + ten_kappa < big_distance \|\|

	427 big_distance - rest > rest + ten_kappa - big_distance)) {

	428 return false;

	429 }

	430

	431 // Weeding test.

	432 // The safe interval is [too_low + 2 ulp; too_high - 2 ulp]

	433 // Since too_low = too_high - unsafe_interval this is equivalent too

	434 // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp]

	435 // Conceptually we have: rest ~= too_high - buffer

	436 return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit);

	437 }

	438

	439

	440 bool grisu3(double v, char* buffer, int* sign, int* length, int* decimal_point) {

	441 if (v < 0) {

	442 v = -v;

	443 *sign = 1;

	444 } else {

	445 *sign = 0;

	446 }

	447 int K;
	Lasse Reichstein 2010/02/22 11:31:36 Lower case variable names (and preferably not sing Lower case variable names (and preferably not single-letter names). Florian Loitsch 2010/02/22 15:52:53 Done. Show quoted text On 2010/02/22 11:31:36, Lasse Reichstein wrote: > Lower case variable names (and preferably not single-letter names). Done. floitsch 2012/04/10 15:54:34 Done. Show quoted text On 2010/02/22 11:31:36, Lasse Reichstein wrote: > Lower case variable names (and preferably not single-letter names). Done.
	448 bool result = Grisu3<-60, -32>::grisu3(v, buffer, length, &K);

	449 decimal_point = length + K;

	450 buffer[*length] = '\0';

	451 return result;

	452 }

	453

	454 } } // namespace v8::internal

OLD	NEW

« src/globals.h ('K') | « src/grisu3.h ('k') | src/powers_ten.h » ('j') | src/powers_ten.h » ('J')