base/debug/format.cc - Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion

Unified Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Addressed Jeffrey's comments Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: base/debug/format.cc

diff --git a/base/debug/format.cc b/base/debug/format.cc

new file mode 100644

index 0000000000000000000000000000000000000000..bfd790795c2add19be70bee5a564dd2f8395873a

--- /dev/null

+++ b/base/debug/format.cc

@@ -0,0 +1,556 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+//

+// Author: markus@chromium.org

+#include <limits>

+#include "base/debug/format.h"

+#if !defined(NDEBUG)

+// In debug builds, we use RAW_CHECK() to print useful error messages, if

+// Format() is called with broken arguments.

+// As our contract promises that Format() can be called from any crazy

jln (very slow on Chromium) 2013/08/01 00:03:15 nit: remove crazy

+// run-time context, it is not actually safe to call logging functions from it;

+// and we only ever do so for debug builds and hope for the best.

+// We should _never_ call any logging function other than RAW_CHECK(), and

+// we should _never_ include any logging code that is active in production

+// builds.

+// In other words; please do not remove the #ifdef around this #include.

+// Instead, in production builds we opt for returning a degraded result,

+// whenever an error is encountered.

+// E.g. The broken function call

+// Format("errno = %d (%x)", errno, strerror(errno))

+// will print something like

+// errno = 13, (%x)

+// instead of

+// errno = 13 (Access denied)

+// In most of the anticipated use cases, that's probably the preferred

+// behavior.

+#include "base/logging.h"

+#define RAW_DCHECK RAW_CHECK

+#else

+#define RAW_DCHECK(x) do { if (x) { } } while (0)

jln (very slow on Chromium) 2013/08/01 00:03:15 Do you want to just add this to base/logging.h ?

+#endif

+namespace base {

+namespace debug {

+// The code in this file is extremely careful to be async-signal-safe.

+//

+// Most obviously, we avoid calling any code that could dynamically allocate

+// memory. Doing so would almost certainly result in bugs and dead-locks.

+// We also avoid calling any other STL functions that could have unintended

+// side-effects involving memory allocation or access to other shared

+// resources.

+//

+// But on top of that, we also avoid calling other library functions, as many

+// of them have the side-effect of calling getenv() (in order to deal with

+// localization) or accessing errno. The latter sounds benign, but there are

+// several execution contexts where it isn't even possible to safely read let

+// alone write errno.

+//

+// The stated design goal of the Format() function is that it can be called

+// from any context that can safely call C or C++ code (i.e. anything that

+// doesn't require assembly code).

+//

+// For a brief overview of some but not all of the issues with async-signal-

+// safety, refer to:

+// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html

+namespace {

jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|

+inline bool IncrementCount(size_t* count, size_t inc = 1) {

jln (very slow on Chromium) 2013/08/01 00:03:15 Please, avoid default parameters. Just have a Incr

+ // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to

+ // at most SSIZE_MAX. So, we know that "inc" is always in the range

+ // 1..SSIZE_MAX.

+ // This allows us to compute "SSIZE_MAX - inc" without incurring any

+ // integer overflows.

+ RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max());

+ if (*count > std::numeric_limits<ssize_t>::max() - inc) {

+ *count = std::numeric_limits<ssize_t>::max();

+ return false;

+ } else {

+ *count += inc;

+ return true;

+ }

+inline bool Out(char* buf, size_t sz, size_t* count, char ch) {

jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|

+ if (*count + 1 < sz) {

jln (very slow on Chromium) 2013/08/01 00:03:15 if (sz >= 1 && count < sz - 1)

+ buf[*count] = ch;

+ IncrementCount(count);

+ return true;

+ }

+ IncrementCount(count);

+ return false;

+inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,

jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|

+ size_t len, char** ptr) {

+ char *dst = *ptr;

+ for (; padding > len; --padding)

+ if (Out(buf, sz, count, pad))

+ ++dst;

+ else {

+ if (--padding)

+ IncrementCount(count, padding-len);

+ break;

+ }

+ *ptr = dst;

+// POSIX doesn't define any async-signal-safe function for converting

+// an integer to ASCII. Define our own version.

+//

+// This also gives us the ability to make the function a little more powerful

+// and have it deal with padding, with truncation, and with predicting the

+// length of the untruncated output.

+//

+// IToASCII() converts an (optionally signed) integer to ASCII. It never

+// writes more than "sz" bytes. Output will be truncated as needed, and a NUL

+// character is appended, unless "sz" is zero. It returns the number of non-NUL

+// bytes that would be output if no truncation had happened.

+//

+// It supports bases 2 through 16. Padding can be done with either '0' zeros

+// or ' ' spaces.

+size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,

+ int base, size_t padding, char pad) {

+ // Sanity check for the "base".

+ if (base < 2 || base > 16 || (sign && base != 10)) {

+ if (static_cast<ssize_t>(sz) >= 1)

+ buf[0] = '\000';

+ return 0;

+ }

+ // Handle negative numbers, if requested by caller.

+ size_t count = 0;

+ size_t n = 1;

+ char* start = buf;

+ int minint = 0;

+ bool needs_minus = false;

+ uint64_t num;

+ if (sign && i < 0) {

+ // If we aren't inserting padding, or if we are padding with '0' zeros,

+ // we should insert the minus character now. It makes it easier to

+ // correctly deal with truncated padded numbers.

+ // On the other hand, if we are padding with ' ' spaces, we have to

+ // delay outputting the minus character until later.

+ if (padding <= 2 || pad == '0') {

+ ++count;

+ // Make sure we can write the '-' character.

+ if (++n > sz) {

+ if (sz > 0)

+ *start = '\000';

+ } else

+ *start++ = '-';

+ // Adjust padding, since we just output one character already.

+ if (padding)

+ --padding;

+ } else

+ needs_minus = true;

+ // Turn our number positive.

+ if (i == std::numeric_limits<int64_t>::min()) {

+ // The most negative integer needs special treatment.

+ minint = 1;

+ num = -(i + 1);

+ } else {

+ // "Normal" negative numbers are easy.

+ num = -i;

+ }

+ } else

+ num = i;

+ // Loop until we have converted the entire number. Output at least one

+ // character (i.e. '0').

+ char* ptr = start;

+ bool started = false;

+ do {

+ // Sanity check. If padding is used to fill the entire address space,

+ // don't allow more than SSIZE_MAX bytes.

+ if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {

+ RAW_DCHECK(count <

+ static_cast<size_t>(std::numeric_limits<ssize_t>::max()));

+ break;

+ }

+ // Make sure there is still enough space left in our output buffer.

+ if (n == sz) {

+ if (ptr > start) {

+ // It is rare that we need to output a partial number. But if asked

+ // to do so, we will still make sure we output the correct number of

+ // leading digits.

+ // Since we are generating the digits in reverse order, we actually

+ // have to discard digits in the order that we have already emitted

+ // them. This is essentially equivalent to:

+ // memmove(start, start+1, --ptr - start)

+ --ptr;

+ for (char* move = start; move < ptr; ++move)

+ *move = move[1];

+ } else

+ goto cannot_write_anything_but_nul;

jln (very slow on Chromium) 2013/08/01 00:03:15 Any way to split this to a subfunction ?

+ } else

+ ++n;

+ // Output the next digit and (if necessary) compensate for the lowest-

+ // most negative integer needing special treatment. This works because,

+ // no matter the bit width of the integer, the lowest-most decimal

+ // integer always ends in 2, 4, 6, or 8.

+ if (n <= sz) {

+ if (!num && started)

+ if (needs_minus) {

+ *ptr++ = '-';

+ needs_minus = false;

+ } else

+ *ptr++ = pad;

+ else {

+ started = true;

+ *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")

+ [num%base+minint];

+ }

+ cannot_write_anything_but_nul:

+ minint = 0;

+ num /= base;

+ // Add padding, if requested.

+ if (padding > 0) {

+ --padding;

+ // Performance optimization for when we are asked to output

+ // excessive padding, but our output buffer is limited in size.

+ // Even if we output a 128bit number in binary, we would never

+ // write more than 130 characters. So, anything beyond this limit

+ // and we can compute the result arithmetically.

+ if (count > n && count - n > 130) {

+ IncrementCount(&count, padding);

+ padding = 0;

+ }

+ } while (num || padding || needs_minus);

+ // Terminate the output with a NUL character.

+ if (sz > 0)

+ *ptr = '\000';

+ // Conversion to ASCII actually resulted in the digits being in reverse

+ // order. We can't easily generate them in forward order, as we can't tell

+ // the number of characters needed until we are done converting.

+ // So, now, we reverse the string (except for the possible '-' sign).

+ while (--ptr > start) {

+ char ch = *ptr;

+ *ptr = *start;

+ *start++ = ch;

+ }

+ return count;

+} // anonymous namespace

+ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,

+ const Arg* args, const size_t max_args) {

+ // Make sure we can write at least one NUL byte.

+ if (static_cast<ssize_t>(sz) < 1)

+ return -1;

+ // Iterate over format string and interpret '%' arguments as they are

+ // encountered.

+ char* ptr = buf;

+ size_t padding;

+ char pad;

+ size_t count = 0;

+ for (unsigned int cur_arg = 0;

+ *fmt &&

+ count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {

+ if (*fmt++ == '%') {

+ padding = 0;

+ pad = ' ';

+ char ch = *fmt++;

+ format_character_found:

+ switch (ch) {

+ case '0': case '1': case '2': case '3': case '4':

+ case '5': case '6': case '7': case '8': case '9':

+ // Found a width parameter. Convert to an integer value and store in

+ // "padding". If the leading digit is a zero, change the padding

+ // character from a space ' ' to a zero '0'.

+ pad = ch == '0' ? '0' : ' ';

+ for (;;) {

+ const size_t max_padding = std::numeric_limits<ssize_t>::max();

+ if (padding > max_padding/10 ||

+ 10*padding > max_padding - (ch - '0')) {

+ RAW_DCHECK(padding <= max_padding/10 &&

+ 10*padding <= max_padding - (ch - '0'));

+ // Integer overflow detected. Skip the rest of the width until

+ // we find the format character, then do the normal error handling.

+ while ((ch = *fmt++) >= '0' && ch <= '9') {

+ }

+ goto fail_to_expand;

+ }

+ padding = 10*padding + ch - '0';

+ ch = *fmt++;

+ if (ch < '0' || ch > '9') {

+ // Reached the end of the width parameter. This is where the format

+ // character is found.

+ goto format_character_found;

+ }

+ break;

+ case 'c': { // Output an ASCII character.

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args) {

+ RAW_DCHECK(cur_arg < max_args);

+ goto fail_to_expand;

+ }

+ // Check that the argument has the expected type.

+ const Arg& arg = args[cur_arg++];

+ if (arg.type_ != Arg::INT &&

+ arg.type_ != Arg::UINT) {

+ RAW_DCHECK(arg.type_ == Arg::INT ||

+ arg.type_ == Arg::UINT);

+ goto fail_to_expand;

+ }

+ // Apply padding, if needed.

+ Pad(buf, sz, &count, ' ', padding, 1, &ptr);

+ // Convert the argument to an ASCII character and output it.

+ char ch = static_cast<char>(arg.i_);

+ if (!ch)

+ goto end_of_output_buffer;

+ if (Out(buf, sz, &count, ch))

+ ++ptr;

+ break; }

+ case 'd': { // Output a signed or unsigned integer-like value.

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args) {

+ RAW_DCHECK(cur_arg < max_args);

+ goto fail_to_expand;

+ }

+ // Check that the argument has the expected type.

+ const Arg& arg = args[cur_arg++];

+ if (arg.type_ != Arg::INT &&

+ arg.type_ != Arg::UINT) {

+ RAW_DCHECK(arg.type_ == Arg::INT ||

+ arg.type_ == Arg::UINT);

+ goto fail_to_expand;

+ }

+ // Our implementation of IToASCII() can handle all widths of data types

+ // and can print both signed and unsigned values.

+ IncrementCount(&count,

+ IToASCII(arg.type_ == Arg::INT, false, arg.i_,

+ ptr, sz - (ptr - buf), 10, padding, pad));

+ // Advance "ptr" to the end of the string that was just emitted.

+ if (sz - (ptr - buf))

+ while (*ptr)

+ ++ptr;

+ break; }

+ case 'x': // Output an unsigned hexadecimal value.

+ case 'X':

+ case 'p': { // Output a pointer value.

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args) {

+ RAW_DCHECK(cur_arg < max_args);

+ goto fail_to_expand;

+ }

+ const Arg& arg = args[cur_arg++];

+ int64_t i;

+ switch (ch) {

+ case 'x': // Hexadecimal values are available for integer-like args.

+ case 'X':

+ // Check that the argument has the expected type.

+ if (arg.type_ != Arg::INT &&

+ arg.type_ != Arg::UINT) {

+ RAW_DCHECK(arg.type_ == Arg::INT ||

+ arg.type_ == Arg::UINT);

+ goto fail_to_expand;

+ }

+ i = arg.i_;

+ // The Arg() constructor automatically performed sign expansion on

+ // signed parameters. This is great when outputting a %d decimal

+ // number, but can result in unexpected leading 0xFF bytes when

+ // outputting a %c hexadecimal number. Mask bits, if necessary.

+ // We have to do this here, instead of in the Arg() constructor, as

+ // the Arg() constructor cannot tell whether we will output a %d

+ // or a %x. Only the latter should experience masking.

+ if (arg.width_ < sizeof(int64_t))

+ i &= (1LL << (8*arg.width_)) - 1;

+ break;

+ default:

+ // Pointer values require an actual pointer or a string.

+ if (arg.type_ == Arg::POINTER)

+ i = reinterpret_cast<uintptr_t>(arg.ptr_);

+ else if (arg.type_ == Arg::STRING)

+ i = reinterpret_cast<uintptr_t>(arg.s_);

+ else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&

+ arg.i_ == 0) // Allow C++'s version of NULL

+ i = 0;

+ else {

+ RAW_DCHECK(arg.type_ == Arg::POINTER ||

+ arg.type_ == Arg::STRING);

+ goto fail_to_expand;

+ }

+ // Pointers always include the "0x" prefix. This affects padding.

+ if (padding) {

+ if (pad == ' ') {

+ // Predict the number of hex digits (including "0x" prefix) that

+ // will be output for this address when it is converted to ASCII.

+ size_t chars = 2;

+ uint64_t j = i;

+ do {

+ ++chars;

+ j >>= 4;

+ } while (j);

+ // Output the necessary number of space characters to perform

+ // padding. We can't rely on IToASCII() to do that for us, as it

+ // would incorrectly add padding _after_ the "0x" prefix.

+ Pad(buf, sz, &count, pad, padding, chars, &ptr);

+ // Inform IToASCII() that it no longer needs to handle the

+ // padding.

+ padding = 0;

+ } else {

+ // Adjust for the two-character "0x" prefix.

+ padding = padding >= 2 ? padding - 2 : 0;

+ }

+ // Insert "0x" prefix, if there is still sufficient space in the

+ // output buffer.

+ if (Out(buf, sz, &count, '0'))

+ ++ptr;

+ if (Out(buf, sz, &count, 'x'))

+ ++ptr;

+ break;

+ }

+ // No matter what data type this value originated from, print it as

+ // a regular hexadecimal number.

+ IncrementCount(&count,

+ IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),

+ 16, padding, pad));

+ // Advance "ptr" to the end of the string that was just emitted.

+ if (sz - (ptr - buf))

+ while (*ptr)

+ ++ptr;

+ break; }

+ case 's': {

+ // Check that there are arguments left to be inserted.

+ if (cur_arg >= max_args) {

+ RAW_DCHECK(cur_arg < max_args);

+ goto fail_to_expand;

+ }

+ // Check that the argument has the expected type.

+ const Arg& arg = args[cur_arg++];

+ const char *s;

+ if (arg.type_ == Arg::STRING)

+ s = arg.s_ ? arg.s_ : "<NULL>";

+ else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&

+ arg.i_ == 0) // Allow C++'s version of NULL

+ s = "<NULL>";

+ else {

+ RAW_DCHECK(arg.type_ == Arg::STRING);

+ goto fail_to_expand;

+ }

+ // Apply padding, if needed. This requires us to first check the

+ // length of the string that we are outputting.

+ if (padding) {

+ size_t len = 0;

+ for (const char* src = s; *src++; )

+ ++len;

+ Pad(buf, sz, &count, ' ', padding, len, &ptr);

+ }

+ // Printing a string involves nothing more than copying it into the

+ // output buffer and making sure we don't output more bytes than

+ // available space.

+ for (const char* src = s; *src; )

+ if (Out(buf, sz, &count, *src++))

+ ++ptr;

+ break; }

+ case '%':

+ // Quoted percent '%' character.

+ goto copy_verbatim;

+ fail_to_expand:

+ // C++ gives us tools to do type checking -- something that snprintf()

+ // could never really do. So, whenever we see arguments that don't

+ // match up with the format string, we refuse to output them. But

+ // since we have to be extremely conservative about being async-

+ // signal-safe, we are limited in the type of error handling that we

+ // can do in production builds (in debug builds we can use RAW_DCHECK()

+ // and hope for the best). So, all we do is pass the format string

+ // unchanged. That should eventually get the user's attention; and in

+ // the meantime, it hopefully doesn't lose too much data.

+ default:

+ // Unknown or unsupported format character. Just copy verbatim to

+ // output.

+ if (Out(buf, sz, &count, '%'))

+ ++ptr;

+ if (!ch)

+ goto end_of_format_string;

+ if (Out(buf, sz, &count, ch))

+ ++ptr;

+ break;

+ }

+ } else {

+ copy_verbatim:

+ if (Out(buf, sz, &count, fmt[-1]))

+ ++ptr;

+ }

+ end_of_format_string:

+ end_of_output_buffer:

+ *ptr = '\000';

+ IncrementCount(&count);

+ return static_cast<ssize_t>(count)-1;

+ssize_t FormatN(char* buf, size_t N, const char* fmt) {

+ // Make sure we can write at least one NUL byte.

+ ssize_t n = static_cast<ssize_t>(N);

+ if (n < 1)

+ return -1;

+ size_t count = 0;

+ // In the slow-path, we deal with errors by copying the contents of

+ // "fmt" unexpanded. This means, if there are no arguments passed, the

+ // Format() function always degenerates to version of strncpy() that

+ // de-duplicates '%' characters.

+ char* dst = buf;

+ const char* src = fmt;

+ for (; *src; ++src) {

+ char ch = *src;

+ if (!IncrementCount(&count) && n > 1) {

+ --dst;

+ break;

+ }

+ if (n > 1) {

+ --n;

+ *dst++ = ch;

+ }

+ if (ch == '%' && src[1] == '%')

+ ++src;

+ }

+ IncrementCount(&count);

+ *dst = '\000';

+ return static_cast<ssize_t>(count)-1;

+} // namespace debug

+} // namespace base

« base/debug/format.h ('K') | « base/debug/format.h ('k') | base/debug/format_unittest.cc » ('j') | base/debug/stack_trace_unittest.cc » ('J')