Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3012)

Unified Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Addressed Jeffrey's comments Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: base/debug/format.cc
diff --git a/base/debug/format.cc b/base/debug/format.cc
new file mode 100644
index 0000000000000000000000000000000000000000..bfd790795c2add19be70bee5a564dd2f8395873a
--- /dev/null
+++ b/base/debug/format.cc
@@ -0,0 +1,556 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// Author: markus@chromium.org
+
+#include <limits>
+
+#include "base/debug/format.h"
+
+#if !defined(NDEBUG)
+// In debug builds, we use RAW_CHECK() to print useful error messages, if
+// Format() is called with broken arguments.
+// As our contract promises that Format() can be called from any crazy
jln (very slow on Chromium) 2013/08/01 00:03:15 nit: remove crazy
+// run-time context, it is not actually safe to call logging functions from it;
+// and we only ever do so for debug builds and hope for the best.
+// We should _never_ call any logging function other than RAW_CHECK(), and
+// we should _never_ include any logging code that is active in production
+// builds.
+// In other words; please do not remove the #ifdef around this #include.
+// Instead, in production builds we opt for returning a degraded result,
+// whenever an error is encountered.
+// E.g. The broken function call
+// Format("errno = %d (%x)", errno, strerror(errno))
+// will print something like
+// errno = 13, (%x)
+// instead of
+// errno = 13 (Access denied)
+// In most of the anticipated use cases, that's probably the preferred
+// behavior.
+#include "base/logging.h"
+#define RAW_DCHECK RAW_CHECK
+#else
+#define RAW_DCHECK(x) do { if (x) { } } while (0)
jln (very slow on Chromium) 2013/08/01 00:03:15 Do you want to just add this to base/logging.h ?
+#endif
+
+
+namespace base {
+namespace debug {
+
+// The code in this file is extremely careful to be async-signal-safe.
+//
+// Most obviously, we avoid calling any code that could dynamically allocate
+// memory. Doing so would almost certainly result in bugs and dead-locks.
+// We also avoid calling any other STL functions that could have unintended
+// side-effects involving memory allocation or access to other shared
+// resources.
+//
+// But on top of that, we also avoid calling other library functions, as many
+// of them have the side-effect of calling getenv() (in order to deal with
+// localization) or accessing errno. The latter sounds benign, but there are
+// several execution contexts where it isn't even possible to safely read let
+// alone write errno.
+//
+// The stated design goal of the Format() function is that it can be called
+// from any context that can safely call C or C++ code (i.e. anything that
+// doesn't require assembly code).
+//
+// For a brief overview of some but not all of the issues with async-signal-
+// safety, refer to:
+// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
+
+namespace {
+
jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|
+inline bool IncrementCount(size_t* count, size_t inc = 1) {
jln (very slow on Chromium) 2013/08/01 00:03:15 Please, avoid default parameters. Just have a Incr
+ // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to
+ // at most SSIZE_MAX. So, we know that "inc" is always in the range
+ // 1..SSIZE_MAX.
+ // This allows us to compute "SSIZE_MAX - inc" without incurring any
+ // integer overflows.
+ RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max());
+ if (*count > std::numeric_limits<ssize_t>::max() - inc) {
+ *count = std::numeric_limits<ssize_t>::max();
+ return false;
+ } else {
+ *count += inc;
+ return true;
+ }
+}
+
+inline bool Out(char* buf, size_t sz, size_t* count, char ch) {
jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|
+ if (*count + 1 < sz) {
jln (very slow on Chromium) 2013/08/01 00:03:15 if (sz >= 1 && count < sz - 1)
+ buf[*count] = ch;
+ IncrementCount(count);
+ return true;
+ }
+ IncrementCount(count);
+ return false;
+}
+
+inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,
jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|
+ size_t len, char** ptr) {
+ char *dst = *ptr;
+ for (; padding > len; --padding)
+ if (Out(buf, sz, count, pad))
+ ++dst;
+ else {
+ if (--padding)
+ IncrementCount(count, padding-len);
+ break;
+ }
+ *ptr = dst;
+}
+
+// POSIX doesn't define any async-signal-safe function for converting
+// an integer to ASCII. Define our own version.
+//
+// This also gives us the ability to make the function a little more powerful
+// and have it deal with padding, with truncation, and with predicting the
+// length of the untruncated output.
+//
+// IToASCII() converts an (optionally signed) integer to ASCII. It never
+// writes more than "sz" bytes. Output will be truncated as needed, and a NUL
+// character is appended, unless "sz" is zero. It returns the number of non-NUL
+// bytes that would be output if no truncation had happened.
+//
+// It supports bases 2 through 16. Padding can be done with either '0' zeros
+// or ' ' spaces.
+size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,
+ int base, size_t padding, char pad) {
+ // Sanity check for the "base".
+ if (base < 2 || base > 16 || (sign && base != 10)) {
+ if (static_cast<ssize_t>(sz) >= 1)
+ buf[0] = '\000';
+ return 0;
+ }
+
+ // Handle negative numbers, if requested by caller.
+ size_t count = 0;
+ size_t n = 1;
+ char* start = buf;
+ int minint = 0;
+ bool needs_minus = false;
+ uint64_t num;
+ if (sign && i < 0) {
+ // If we aren't inserting padding, or if we are padding with '0' zeros,
+ // we should insert the minus character now. It makes it easier to
+ // correctly deal with truncated padded numbers.
+ // On the other hand, if we are padding with ' ' spaces, we have to
+ // delay outputting the minus character until later.
+ if (padding <= 2 || pad == '0') {
+ ++count;
+
+ // Make sure we can write the '-' character.
+ if (++n > sz) {
+ if (sz > 0)
+ *start = '\000';
+ } else
+ *start++ = '-';
+
+ // Adjust padding, since we just output one character already.
+ if (padding)
+ --padding;
+ } else
+ needs_minus = true;
+
+ // Turn our number positive.
+ if (i == std::numeric_limits<int64_t>::min()) {
+ // The most negative integer needs special treatment.
+ minint = 1;
+ num = -(i + 1);
+ } else {
+ // "Normal" negative numbers are easy.
+ num = -i;
+ }
+ } else
+ num = i;
+
+ // Loop until we have converted the entire number. Output at least one
+ // character (i.e. '0').
+ char* ptr = start;
+ bool started = false;
+ do {
+ // Sanity check. If padding is used to fill the entire address space,
+ // don't allow more than SSIZE_MAX bytes.
+ if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {
+ RAW_DCHECK(count <
+ static_cast<size_t>(std::numeric_limits<ssize_t>::max()));
+ break;
+ }
+
+ // Make sure there is still enough space left in our output buffer.
+ if (n == sz) {
+ if (ptr > start) {
+ // It is rare that we need to output a partial number. But if asked
+ // to do so, we will still make sure we output the correct number of
+ // leading digits.
+ // Since we are generating the digits in reverse order, we actually
+ // have to discard digits in the order that we have already emitted
+ // them. This is essentially equivalent to:
+ // memmove(start, start+1, --ptr - start)
+ --ptr;
+ for (char* move = start; move < ptr; ++move)
+ *move = move[1];
+ } else
+ goto cannot_write_anything_but_nul;
jln (very slow on Chromium) 2013/08/01 00:03:15 Any way to split this to a subfunction ?
+ } else
+ ++n;
+
+ // Output the next digit and (if necessary) compensate for the lowest-
+ // most negative integer needing special treatment. This works because,
+ // no matter the bit width of the integer, the lowest-most decimal
+ // integer always ends in 2, 4, 6, or 8.
+ if (n <= sz) {
+ if (!num && started)
+ if (needs_minus) {
+ *ptr++ = '-';
+ needs_minus = false;
+ } else
+ *ptr++ = pad;
+ else {
+ started = true;
+ *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")
+ [num%base+minint];
+ }
+ }
+
+ cannot_write_anything_but_nul:
+ minint = 0;
+ num /= base;
+
+ // Add padding, if requested.
+ if (padding > 0) {
+ --padding;
+
+ // Performance optimization for when we are asked to output
+ // excessive padding, but our output buffer is limited in size.
+ // Even if we output a 128bit number in binary, we would never
+ // write more than 130 characters. So, anything beyond this limit
+ // and we can compute the result arithmetically.
+ if (count > n && count - n > 130) {
+ IncrementCount(&count, padding);
+ padding = 0;
+ }
+ }
+ } while (num || padding || needs_minus);
+
+ // Terminate the output with a NUL character.
+ if (sz > 0)
+ *ptr = '\000';
+
+ // Conversion to ASCII actually resulted in the digits being in reverse
+ // order. We can't easily generate them in forward order, as we can't tell
+ // the number of characters needed until we are done converting.
+ // So, now, we reverse the string (except for the possible '-' sign).
+ while (--ptr > start) {
+ char ch = *ptr;
+ *ptr = *start;
+ *start++ = ch;
+ }
+ return count;
+}
+
+} // anonymous namespace
+
+ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,
+ const Arg* args, const size_t max_args) {
+ // Make sure we can write at least one NUL byte.
+ if (static_cast<ssize_t>(sz) < 1)
+ return -1;
+
+ // Iterate over format string and interpret '%' arguments as they are
+ // encountered.
+ char* ptr = buf;
+ size_t padding;
+ char pad;
+ size_t count = 0;
+ for (unsigned int cur_arg = 0;
+ *fmt &&
+ count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {
+ if (*fmt++ == '%') {
+ padding = 0;
+ pad = ' ';
+ char ch = *fmt++;
+ format_character_found:
+ switch (ch) {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ // Found a width parameter. Convert to an integer value and store in
+ // "padding". If the leading digit is a zero, change the padding
+ // character from a space ' ' to a zero '0'.
+ pad = ch == '0' ? '0' : ' ';
+ for (;;) {
+ const size_t max_padding = std::numeric_limits<ssize_t>::max();
+ if (padding > max_padding/10 ||
+ 10*padding > max_padding - (ch - '0')) {
+ RAW_DCHECK(padding <= max_padding/10 &&
+ 10*padding <= max_padding - (ch - '0'));
+ // Integer overflow detected. Skip the rest of the width until
+ // we find the format character, then do the normal error handling.
+ while ((ch = *fmt++) >= '0' && ch <= '9') {
+ }
+ goto fail_to_expand;
+ }
+ padding = 10*padding + ch - '0';
+ ch = *fmt++;
+ if (ch < '0' || ch > '9') {
+ // Reached the end of the width parameter. This is where the format
+ // character is found.
+ goto format_character_found;
+ }
+ }
+ break;
+ case 'c': { // Output an ASCII character.
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args) {
+ RAW_DCHECK(cur_arg < max_args);
+ goto fail_to_expand;
+ }
+
+ // Check that the argument has the expected type.
+ const Arg& arg = args[cur_arg++];
+ if (arg.type_ != Arg::INT &&
+ arg.type_ != Arg::UINT) {
+ RAW_DCHECK(arg.type_ == Arg::INT ||
+ arg.type_ == Arg::UINT);
+ goto fail_to_expand;
+ }
+
+ // Apply padding, if needed.
+ Pad(buf, sz, &count, ' ', padding, 1, &ptr);
+
+ // Convert the argument to an ASCII character and output it.
+ char ch = static_cast<char>(arg.i_);
+ if (!ch)
+ goto end_of_output_buffer;
+ if (Out(buf, sz, &count, ch))
+ ++ptr;
+ break; }
+ case 'd': { // Output a signed or unsigned integer-like value.
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args) {
+ RAW_DCHECK(cur_arg < max_args);
+ goto fail_to_expand;
+ }
+
+ // Check that the argument has the expected type.
+ const Arg& arg = args[cur_arg++];
+ if (arg.type_ != Arg::INT &&
+ arg.type_ != Arg::UINT) {
+ RAW_DCHECK(arg.type_ == Arg::INT ||
+ arg.type_ == Arg::UINT);
+ goto fail_to_expand;
+ }
+
+ // Our implementation of IToASCII() can handle all widths of data types
+ // and can print both signed and unsigned values.
+ IncrementCount(&count,
+ IToASCII(arg.type_ == Arg::INT, false, arg.i_,
+ ptr, sz - (ptr - buf), 10, padding, pad));
+
+ // Advance "ptr" to the end of the string that was just emitted.
+ if (sz - (ptr - buf))
+ while (*ptr)
+ ++ptr;
+ break; }
+ case 'x': // Output an unsigned hexadecimal value.
+ case 'X':
+ case 'p': { // Output a pointer value.
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args) {
+ RAW_DCHECK(cur_arg < max_args);
+ goto fail_to_expand;
+ }
+
+ const Arg& arg = args[cur_arg++];
+ int64_t i;
+ switch (ch) {
+ case 'x': // Hexadecimal values are available for integer-like args.
+ case 'X':
+ // Check that the argument has the expected type.
+ if (arg.type_ != Arg::INT &&
+ arg.type_ != Arg::UINT) {
+ RAW_DCHECK(arg.type_ == Arg::INT ||
+ arg.type_ == Arg::UINT);
+ goto fail_to_expand;
+ }
+ i = arg.i_;
+
+ // The Arg() constructor automatically performed sign expansion on
+ // signed parameters. This is great when outputting a %d decimal
+ // number, but can result in unexpected leading 0xFF bytes when
+ // outputting a %c hexadecimal number. Mask bits, if necessary.
+ // We have to do this here, instead of in the Arg() constructor, as
+ // the Arg() constructor cannot tell whether we will output a %d
+ // or a %x. Only the latter should experience masking.
+ if (arg.width_ < sizeof(int64_t))
+ i &= (1LL << (8*arg.width_)) - 1;
+ break;
+ default:
+ // Pointer values require an actual pointer or a string.
+ if (arg.type_ == Arg::POINTER)
+ i = reinterpret_cast<uintptr_t>(arg.ptr_);
+ else if (arg.type_ == Arg::STRING)
+ i = reinterpret_cast<uintptr_t>(arg.s_);
+ else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&
+ arg.i_ == 0) // Allow C++'s version of NULL
+ i = 0;
+ else {
+ RAW_DCHECK(arg.type_ == Arg::POINTER ||
+ arg.type_ == Arg::STRING);
+ goto fail_to_expand;
+ }
+
+ // Pointers always include the "0x" prefix. This affects padding.
+ if (padding) {
+ if (pad == ' ') {
+ // Predict the number of hex digits (including "0x" prefix) that
+ // will be output for this address when it is converted to ASCII.
+ size_t chars = 2;
+ uint64_t j = i;
+ do {
+ ++chars;
+ j >>= 4;
+ } while (j);
+
+ // Output the necessary number of space characters to perform
+ // padding. We can't rely on IToASCII() to do that for us, as it
+ // would incorrectly add padding _after_ the "0x" prefix.
+ Pad(buf, sz, &count, pad, padding, chars, &ptr);
+
+ // Inform IToASCII() that it no longer needs to handle the
+ // padding.
+ padding = 0;
+ } else {
+ // Adjust for the two-character "0x" prefix.
+ padding = padding >= 2 ? padding - 2 : 0;
+ }
+ }
+
+ // Insert "0x" prefix, if there is still sufficient space in the
+ // output buffer.
+ if (Out(buf, sz, &count, '0'))
+ ++ptr;
+ if (Out(buf, sz, &count, 'x'))
+ ++ptr;
+ break;
+ }
+
+ // No matter what data type this value originated from, print it as
+ // a regular hexadecimal number.
+ IncrementCount(&count,
+ IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),
+ 16, padding, pad));
+
+ // Advance "ptr" to the end of the string that was just emitted.
+ if (sz - (ptr - buf))
+ while (*ptr)
+ ++ptr;
+ break; }
+ case 's': {
+ // Check that there are arguments left to be inserted.
+ if (cur_arg >= max_args) {
+ RAW_DCHECK(cur_arg < max_args);
+ goto fail_to_expand;
+ }
+
+ // Check that the argument has the expected type.
+ const Arg& arg = args[cur_arg++];
+ const char *s;
+ if (arg.type_ == Arg::STRING)
+ s = arg.s_ ? arg.s_ : "<NULL>";
+ else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&
+ arg.i_ == 0) // Allow C++'s version of NULL
+ s = "<NULL>";
+ else {
+ RAW_DCHECK(arg.type_ == Arg::STRING);
+ goto fail_to_expand;
+ }
+
+ // Apply padding, if needed. This requires us to first check the
+ // length of the string that we are outputting.
+ if (padding) {
+ size_t len = 0;
+ for (const char* src = s; *src++; )
+ ++len;
+ Pad(buf, sz, &count, ' ', padding, len, &ptr);
+ }
+
+ // Printing a string involves nothing more than copying it into the
+ // output buffer and making sure we don't output more bytes than
+ // available space.
+ for (const char* src = s; *src; )
+ if (Out(buf, sz, &count, *src++))
+ ++ptr;
+ break; }
+ case '%':
+ // Quoted percent '%' character.
+ goto copy_verbatim;
+ fail_to_expand:
+ // C++ gives us tools to do type checking -- something that snprintf()
+ // could never really do. So, whenever we see arguments that don't
+ // match up with the format string, we refuse to output them. But
+ // since we have to be extremely conservative about being async-
+ // signal-safe, we are limited in the type of error handling that we
+ // can do in production builds (in debug builds we can use RAW_DCHECK()
+ // and hope for the best). So, all we do is pass the format string
+ // unchanged. That should eventually get the user's attention; and in
+ // the meantime, it hopefully doesn't lose too much data.
+ default:
+ // Unknown or unsupported format character. Just copy verbatim to
+ // output.
+ if (Out(buf, sz, &count, '%'))
+ ++ptr;
+ if (!ch)
+ goto end_of_format_string;
+ if (Out(buf, sz, &count, ch))
+ ++ptr;
+ break;
+ }
+ } else {
+ copy_verbatim:
+ if (Out(buf, sz, &count, fmt[-1]))
+ ++ptr;
+ }
+ }
+ end_of_format_string:
+ end_of_output_buffer:
+ *ptr = '\000';
+ IncrementCount(&count);
+ return static_cast<ssize_t>(count)-1;
+}
+
+ssize_t FormatN(char* buf, size_t N, const char* fmt) {
+ // Make sure we can write at least one NUL byte.
+ ssize_t n = static_cast<ssize_t>(N);
+ if (n < 1)
+ return -1;
+ size_t count = 0;
+
+ // In the slow-path, we deal with errors by copying the contents of
+ // "fmt" unexpanded. This means, if there are no arguments passed, the
+ // Format() function always degenerates to version of strncpy() that
+ // de-duplicates '%' characters.
+ char* dst = buf;
+ const char* src = fmt;
+ for (; *src; ++src) {
+ char ch = *src;
+ if (!IncrementCount(&count) && n > 1) {
+ --dst;
+ break;
+ }
+ if (n > 1) {
+ --n;
+ *dst++ = ch;
+ }
+ if (ch == '%' && src[1] == '%')
+ ++src;
+ }
+ IncrementCount(&count);
+ *dst = '\000';
+ return static_cast<ssize_t>(count)-1;
+}
+
+} // namespace debug
+} // namespace base

Powered by Google App Engine
This is Rietveld 408576698