base/debug/format.cc - Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion

Side by Side Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Addressed Jeffrey's comments Created 7 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4 //

	5 // Author: markus@chromium.org

	6

	7 #include <limits>

	8

	9 #include "base/debug/format.h"

	10

	11 #if !defined(NDEBUG)

	12 // In debug builds, we use RAW_CHECK() to print useful error messages, if

	13 // Format() is called with broken arguments.

	14 // As our contract promises that Format() can be called from any crazy
	jln (very slow on Chromium) 2013/08/01 00:03:15 nit: remove crazy nit: remove crazy
	15 // run-time context, it is not actually safe to call logging functions from it;

	16 // and we only ever do so for debug builds and hope for the best.

	17 // We should _never_ call any logging function other than RAW_CHECK(), and

	18 // we should _never_ include any logging code that is active in production

	19 // builds.

	20 // In other words; please do not remove the #ifdef around this #include.

	21 // Instead, in production builds we opt for returning a degraded result,

	22 // whenever an error is encountered.

	23 // E.g. The broken function call

	24 // Format("errno = %d (%x)", errno, strerror(errno))

	25 // will print something like

	26 // errno = 13, (%x)

	27 // instead of

	28 // errno = 13 (Access denied)

	29 // In most of the anticipated use cases, that's probably the preferred

	30 // behavior.

	31 #include "base/logging.h"

	32 #define RAW_DCHECK RAW_CHECK

	33 #else

	34 #define RAW_DCHECK(x) do { if (x) { } } while (0)
	jln (very slow on Chromium) 2013/08/01 00:03:15 Do you want to just add this to base/logging.h ? Do you want to just add this to base/logging.h ? It's about time we have a RAW_DCHECK. Also DCHECK is not exactly CHECK if !NDEBUG, we compile certain release builds with DCHECKs enabled to improve coverage.
	35 #endif

	36

	37

	38 namespace base {

	39 namespace debug {

	40

	41 // The code in this file is extremely careful to be async-signal-safe.

	42 //

	43 // Most obviously, we avoid calling any code that could dynamically allocate

	44 // memory. Doing so would almost certainly result in bugs and dead-locks.

	45 // We also avoid calling any other STL functions that could have unintended

	46 // side-effects involving memory allocation or access to other shared

	47 // resources.

	48 //

	49 // But on top of that, we also avoid calling other library functions, as many

	50 // of them have the side-effect of calling getenv() (in order to deal with

	51 // localization) or accessing errno. The latter sounds benign, but there are

	52 // several execution contexts where it isn't even possible to safely read let

	53 // alone write errno.

	54 //

	55 // The stated design goal of the Format() function is that it can be called

	56 // from any context that can safely call C or C++ code (i.e. anything that

	57 // doesn't require assembly code).

	58 //

	59 // For a brief overview of some but not all of the issues with async-signal-

	60 // safety, refer to:

	61 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html

	62

	63 namespace {

	64
	jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its \|parameters\| Please document this function and its \|parameters\|.
	65 inline bool IncrementCount(size_t* count, size_t inc = 1) {
	jln (very slow on Chromium) 2013/08/01 00:03:15 Please, avoid default parameters. Just have a Incr Please, avoid default parameters. Just have a IncrementCountGeneric() to implement the general case.
	66 // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to

	67 // at most SSIZE_MAX. So, we know that "inc" is always in the range

	68 // 1..SSIZE_MAX.

	69 // This allows us to compute "SSIZE_MAX - inc" without incurring any

	70 // integer overflows.

	71 RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max());

	72 if (*count > std::numeric_limits<ssize_t>::max() - inc) {

	73 *count = std::numeric_limits<ssize_t>::max();

	74 return false;

	75 } else {

	76 *count += inc;

	77 return true;

	78 }

	79 }

	80

	81 inline bool Out(char* buf, size_t sz, size_t* count, char ch) {
	jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its \|parameters\| Please document this function and its \|parameters\|
	82 if (*count + 1 < sz) {
	jln (very slow on Chromium) 2013/08/01 00:03:15 if (sz >= 1 && count < sz - 1) if (sz >= 1 && count < sz - 1)
	83 buf[*count] = ch;

	84 IncrementCount(count);

	85 return true;

	86 }

	87 IncrementCount(count);

	88 return false;

	89 }

	90

	91 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,
	jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its \|parameters\| Please document this function and its \|parameters\|
	92 size_t len, char** ptr) {

	93 char dst = ptr;

	94 for (; padding > len; --padding)

	95 if (Out(buf, sz, count, pad))

	96 ++dst;

	97 else {

	98 if (--padding)

	99 IncrementCount(count, padding-len);

	100 break;

	101 }

	102 *ptr = dst;

	103 }

	104

	105 // POSIX doesn't define any async-signal-safe function for converting

	106 // an integer to ASCII. Define our own version.

	107 //

	108 // This also gives us the ability to make the function a little more powerful

	109 // and have it deal with padding, with truncation, and with predicting the

	110 // length of the untruncated output.

	111 //

	112 // IToASCII() converts an (optionally signed) integer to ASCII. It never

	113 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL

	114 // character is appended, unless "sz" is zero. It returns the number of non-NUL

	115 // bytes that would be output if no truncation had happened.

	116 //

	117 // It supports bases 2 through 16. Padding can be done with either '0' zeros

	118 // or ' ' spaces.

	119 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,

	120 int base, size_t padding, char pad) {

	121 // Sanity check for the "base".

	122 if (base < 2 \|\| base > 16 \|\| (sign && base != 10)) {

	123 if (static_cast<ssize_t>(sz) >= 1)

	124 buf[0] = '\000';

	125 return 0;

	126 }

	127

	128 // Handle negative numbers, if requested by caller.

	129 size_t count = 0;

	130 size_t n = 1;

	131 char* start = buf;

	132 int minint = 0;

	133 bool needs_minus = false;

	134 uint64_t num;

	135 if (sign && i < 0) {

	136 // If we aren't inserting padding, or if we are padding with '0' zeros,

	137 // we should insert the minus character now. It makes it easier to

	138 // correctly deal with truncated padded numbers.

	139 // On the other hand, if we are padding with ' ' spaces, we have to

	140 // delay outputting the minus character until later.

	141 if (padding <= 2 \|\| pad == '0') {

	142 ++count;

	143

	144 // Make sure we can write the '-' character.

	145 if (++n > sz) {

	146 if (sz > 0)

	147 *start = '\000';

	148 } else

	149 *start++ = '-';

	150

	151 // Adjust padding, since we just output one character already.

	152 if (padding)

	153 --padding;

	154 } else

	155 needs_minus = true;

	156

	157 // Turn our number positive.

	158 if (i == std::numeric_limits<int64_t>::min()) {

	159 // The most negative integer needs special treatment.

	160 minint = 1;

	161 num = -(i + 1);

	162 } else {

	163 // "Normal" negative numbers are easy.

	164 num = -i;

	165 }

	166 } else

	167 num = i;

	168

	169 // Loop until we have converted the entire number. Output at least one

	170 // character (i.e. '0').

	171 char* ptr = start;

	172 bool started = false;

	173 do {

	174 // Sanity check. If padding is used to fill the entire address space,

	175 // don't allow more than SSIZE_MAX bytes.

	176 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {

	177 RAW_DCHECK(count <

	178 static_cast<size_t>(std::numeric_limits<ssize_t>::max()));

	179 break;

	180 }

	181

	182 // Make sure there is still enough space left in our output buffer.

	183 if (n == sz) {

	184 if (ptr > start) {

	185 // It is rare that we need to output a partial number. But if asked

	186 // to do so, we will still make sure we output the correct number of

	187 // leading digits.

	188 // Since we are generating the digits in reverse order, we actually

	189 // have to discard digits in the order that we have already emitted

	190 // them. This is essentially equivalent to:

	191 // memmove(start, start+1, --ptr - start)

	192 --ptr;

	193 for (char* move = start; move < ptr; ++move)

	194 *move = move[1];

	195 } else

	196 goto cannot_write_anything_but_nul;
	jln (very slow on Chromium) 2013/08/01 00:03:15 Any way to split this to a subfunction ? Any way to split this to a subfunction ?
	197 } else

	198 ++n;

	199

	200 // Output the next digit and (if necessary) compensate for the lowest-

	201 // most negative integer needing special treatment. This works because,

	202 // no matter the bit width of the integer, the lowest-most decimal

	203 // integer always ends in 2, 4, 6, or 8.

	204 if (n <= sz) {

	205 if (!num && started)

	206 if (needs_minus) {

	207 *ptr++ = '-';

	208 needs_minus = false;

	209 } else

	210 *ptr++ = pad;

	211 else {

	212 started = true;

	213 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")

	214 [num%base+minint];

	215 }

	216 }

	217

	218 cannot_write_anything_but_nul:

	219 minint = 0;

	220 num /= base;

	221

	222 // Add padding, if requested.

	223 if (padding > 0) {

	224 --padding;

	225

	226 // Performance optimization for when we are asked to output

	227 // excessive padding, but our output buffer is limited in size.

	228 // Even if we output a 128bit number in binary, we would never

	229 // write more than 130 characters. So, anything beyond this limit

	230 // and we can compute the result arithmetically.

	231 if (count > n && count - n > 130) {

	232 IncrementCount(&count, padding);

	233 padding = 0;

	234 }

	235 }

	236 } while (num \|\| padding \|\| needs_minus);

	237

	238 // Terminate the output with a NUL character.

	239 if (sz > 0)

	240 *ptr = '\000';

	241

	242 // Conversion to ASCII actually resulted in the digits being in reverse

	243 // order. We can't easily generate them in forward order, as we can't tell

	244 // the number of characters needed until we are done converting.

	245 // So, now, we reverse the string (except for the possible '-' sign).

	246 while (--ptr > start) {

	247 char ch = *ptr;

	248 ptr = start;

	249 *start++ = ch;

	250 }

	251 return count;

	252 }

	253

	254 } // anonymous namespace

	255

	256 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,

	257 const Arg* args, const size_t max_args) {

	258 // Make sure we can write at least one NUL byte.

	259 if (static_cast<ssize_t>(sz) < 1)

	260 return -1;

	261

	262 // Iterate over format string and interpret '%' arguments as they are

	263 // encountered.

	264 char* ptr = buf;

	265 size_t padding;

	266 char pad;

	267 size_t count = 0;

	268 for (unsigned int cur_arg = 0;

	269 *fmt &&

	270 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {

	271 if (*fmt++ == '%') {

	272 padding = 0;

	273 pad = ' ';

	274 char ch = *fmt++;

	275 format_character_found:

	276 switch (ch) {

	277 case '0': case '1': case '2': case '3': case '4':

	278 case '5': case '6': case '7': case '8': case '9':

	279 // Found a width parameter. Convert to an integer value and store in

	280 // "padding". If the leading digit is a zero, change the padding

	281 // character from a space ' ' to a zero '0'.

	282 pad = ch == '0' ? '0' : ' ';

	283 for (;;) {

	284 const size_t max_padding = std::numeric_limits<ssize_t>::max();

	285 if (padding > max_padding/10 \|\|

	286 10*padding > max_padding - (ch - '0')) {

	287 RAW_DCHECK(padding <= max_padding/10 &&

	288 10*padding <= max_padding - (ch - '0'));

	289 // Integer overflow detected. Skip the rest of the width until

	290 // we find the format character, then do the normal error handling.

	291 while ((ch = *fmt++) >= '0' && ch <= '9') {

	292 }

	293 goto fail_to_expand;

	294 }

	295 padding = 10*padding + ch - '0';

	296 ch = *fmt++;

	297 if (ch < '0' \|\| ch > '9') {

	298 // Reached the end of the width parameter. This is where the format

	299 // character is found.

	300 goto format_character_found;

	301 }

	302 }

	303 break;

	304 case 'c': { // Output an ASCII character.

	305 // Check that there are arguments left to be inserted.

	306 if (cur_arg >= max_args) {

	307 RAW_DCHECK(cur_arg < max_args);

	308 goto fail_to_expand;

	309 }

	310

	311 // Check that the argument has the expected type.

	312 const Arg& arg = args[cur_arg++];

	313 if (arg.type_ != Arg::INT &&

	314 arg.type_ != Arg::UINT) {

	315 RAW_DCHECK(arg.type_ == Arg::INT \|\|

	316 arg.type_ == Arg::UINT);

	317 goto fail_to_expand;

	318 }

	319

	320 // Apply padding, if needed.

	321 Pad(buf, sz, &count, ' ', padding, 1, &ptr);

	322

	323 // Convert the argument to an ASCII character and output it.

	324 char ch = static_cast<char>(arg.i_);

	325 if (!ch)

	326 goto end_of_output_buffer;

	327 if (Out(buf, sz, &count, ch))

	328 ++ptr;

	329 break; }

	330 case 'd': { // Output a signed or unsigned integer-like value.

	331 // Check that there are arguments left to be inserted.

	332 if (cur_arg >= max_args) {

	333 RAW_DCHECK(cur_arg < max_args);

	334 goto fail_to_expand;

	335 }

	336

	337 // Check that the argument has the expected type.

	338 const Arg& arg = args[cur_arg++];

	339 if (arg.type_ != Arg::INT &&

	340 arg.type_ != Arg::UINT) {

	341 RAW_DCHECK(arg.type_ == Arg::INT \|\|

	342 arg.type_ == Arg::UINT);

	343 goto fail_to_expand;

	344 }

	345

	346 // Our implementation of IToASCII() can handle all widths of data types

	347 // and can print both signed and unsigned values.

	348 IncrementCount(&count,

	349 IToASCII(arg.type_ == Arg::INT, false, arg.i_,

	350 ptr, sz - (ptr - buf), 10, padding, pad));

	351

	352 // Advance "ptr" to the end of the string that was just emitted.

	353 if (sz - (ptr - buf))

	354 while (*ptr)

	355 ++ptr;

	356 break; }

	357 case 'x': // Output an unsigned hexadecimal value.

	358 case 'X':

	359 case 'p': { // Output a pointer value.

	360 // Check that there are arguments left to be inserted.

	361 if (cur_arg >= max_args) {

	362 RAW_DCHECK(cur_arg < max_args);

	363 goto fail_to_expand;

	364 }

	365

	366 const Arg& arg = args[cur_arg++];

	367 int64_t i;

	368 switch (ch) {

	369 case 'x': // Hexadecimal values are available for integer-like args.

	370 case 'X':

	371 // Check that the argument has the expected type.

	372 if (arg.type_ != Arg::INT &&

	373 arg.type_ != Arg::UINT) {

	374 RAW_DCHECK(arg.type_ == Arg::INT \|\|

	375 arg.type_ == Arg::UINT);

	376 goto fail_to_expand;

	377 }

	378 i = arg.i_;

	379

	380 // The Arg() constructor automatically performed sign expansion on

	381 // signed parameters. This is great when outputting a %d decimal

	382 // number, but can result in unexpected leading 0xFF bytes when

	383 // outputting a %c hexadecimal number. Mask bits, if necessary.

	384 // We have to do this here, instead of in the Arg() constructor, as

	385 // the Arg() constructor cannot tell whether we will output a %d

	386 // or a %x. Only the latter should experience masking.

	387 if (arg.width_ < sizeof(int64_t))

	388 i &= (1LL << (8*arg.width_)) - 1;

	389 break;

	390 default:

	391 // Pointer values require an actual pointer or a string.

	392 if (arg.type_ == Arg::POINTER)

	393 i = reinterpret_cast<uintptr_t>(arg.ptr_);

	394 else if (arg.type_ == Arg::STRING)

	395 i = reinterpret_cast<uintptr_t>(arg.s_);

	396 else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&

	397 arg.i_ == 0) // Allow C++'s version of NULL

	398 i = 0;

	399 else {

	400 RAW_DCHECK(arg.type_ == Arg::POINTER \|\|

	401 arg.type_ == Arg::STRING);

	402 goto fail_to_expand;

	403 }

	404

	405 // Pointers always include the "0x" prefix. This affects padding.

	406 if (padding) {

	407 if (pad == ' ') {

	408 // Predict the number of hex digits (including "0x" prefix) that

	409 // will be output for this address when it is converted to ASCII.

	410 size_t chars = 2;

	411 uint64_t j = i;

	412 do {

	413 ++chars;

	414 j >>= 4;

	415 } while (j);

	416

	417 // Output the necessary number of space characters to perform

	418 // padding. We can't rely on IToASCII() to do that for us, as it

	419 // would incorrectly add padding _after_ the "0x" prefix.

	420 Pad(buf, sz, &count, pad, padding, chars, &ptr);

	421

	422 // Inform IToASCII() that it no longer needs to handle the

	423 // padding.

	424 padding = 0;

	425 } else {

	426 // Adjust for the two-character "0x" prefix.

	427 padding = padding >= 2 ? padding - 2 : 0;

	428 }

	429 }

	430

	431 // Insert "0x" prefix, if there is still sufficient space in the

	432 // output buffer.

	433 if (Out(buf, sz, &count, '0'))

	434 ++ptr;

	435 if (Out(buf, sz, &count, 'x'))

	436 ++ptr;

	437 break;

	438 }

	439

	440 // No matter what data type this value originated from, print it as

	441 // a regular hexadecimal number.

	442 IncrementCount(&count,

	443 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),

	444 16, padding, pad));

	445

	446 // Advance "ptr" to the end of the string that was just emitted.

	447 if (sz - (ptr - buf))

	448 while (*ptr)

	449 ++ptr;

	450 break; }

	451 case 's': {

	452 // Check that there are arguments left to be inserted.

	453 if (cur_arg >= max_args) {

	454 RAW_DCHECK(cur_arg < max_args);

	455 goto fail_to_expand;

	456 }

	457

	458 // Check that the argument has the expected type.

	459 const Arg& arg = args[cur_arg++];

	460 const char *s;

	461 if (arg.type_ == Arg::STRING)

	462 s = arg.s_ ? arg.s_ : "<NULL>";

	463 else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&

	464 arg.i_ == 0) // Allow C++'s version of NULL

	465 s = "<NULL>";

	466 else {

	467 RAW_DCHECK(arg.type_ == Arg::STRING);

	468 goto fail_to_expand;

	469 }

	470

	471 // Apply padding, if needed. This requires us to first check the

	472 // length of the string that we are outputting.

	473 if (padding) {

	474 size_t len = 0;

	475 for (const char* src = s; *src++; )

	476 ++len;

	477 Pad(buf, sz, &count, ' ', padding, len, &ptr);

	478 }

	479

	480 // Printing a string involves nothing more than copying it into the

	481 // output buffer and making sure we don't output more bytes than

	482 // available space.

	483 for (const char* src = s; *src; )

	484 if (Out(buf, sz, &count, *src++))

	485 ++ptr;

	486 break; }

	487 case '%':

	488 // Quoted percent '%' character.

	489 goto copy_verbatim;

	490 fail_to_expand:

	491 // C++ gives us tools to do type checking -- something that snprintf()

	492 // could never really do. So, whenever we see arguments that don't

	493 // match up with the format string, we refuse to output them. But

	494 // since we have to be extremely conservative about being async-

	495 // signal-safe, we are limited in the type of error handling that we

	496 // can do in production builds (in debug builds we can use RAW_DCHECK()

	497 // and hope for the best). So, all we do is pass the format string

	498 // unchanged. That should eventually get the user's attention; and in

	499 // the meantime, it hopefully doesn't lose too much data.

	500 default:

	501 // Unknown or unsupported format character. Just copy verbatim to

	502 // output.

	503 if (Out(buf, sz, &count, '%'))

	504 ++ptr;

	505 if (!ch)

	506 goto end_of_format_string;

	507 if (Out(buf, sz, &count, ch))

	508 ++ptr;

	509 break;

	510 }

	511 } else {

	512 copy_verbatim:

	513 if (Out(buf, sz, &count, fmt[-1]))

	514 ++ptr;

	515 }

	516 }

	517 end_of_format_string:

	518 end_of_output_buffer:

	519 *ptr = '\000';

	520 IncrementCount(&count);

	521 return static_cast<ssize_t>(count)-1;

	522 }

	523

	524 ssize_t FormatN(char* buf, size_t N, const char* fmt) {

	525 // Make sure we can write at least one NUL byte.

	526 ssize_t n = static_cast<ssize_t>(N);

	527 if (n < 1)

	528 return -1;

	529 size_t count = 0;

	530

	531 // In the slow-path, we deal with errors by copying the contents of

	532 // "fmt" unexpanded. This means, if there are no arguments passed, the

	533 // Format() function always degenerates to version of strncpy() that

	534 // de-duplicates '%' characters.

	535 char* dst = buf;

	536 const char* src = fmt;

	537 for (; *src; ++src) {

	538 char ch = *src;

	539 if (!IncrementCount(&count) && n > 1) {

	540 --dst;

	541 break;

	542 }

	543 if (n > 1) {

	544 --n;

	545 *dst++ = ch;

	546 }

	547 if (ch == '%' && src[1] == '%')

	548 ++src;

	549 }

	550 IncrementCount(&count);

	551 *dst = '\000';

	552 return static_cast<ssize_t>(count)-1;

	553 }

	554

	555 } // namespace debug

	556 } // namespace base

OLD	NEW

« base/debug/format.h ('K') | « base/debug/format.h ('k') | base/debug/format_unittest.cc » ('j') | base/debug/stack_trace_unittest.cc » ('J')