Chromium Code Reviews| Index: base/debug/format.cc |
| diff --git a/base/debug/format.cc b/base/debug/format.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..bfd790795c2add19be70bee5a564dd2f8395873a |
| --- /dev/null |
| +++ b/base/debug/format.cc |
| @@ -0,0 +1,556 @@ |
| +// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| +// |
| +// Author: markus@chromium.org |
| + |
| +#include <limits> |
| + |
| +#include "base/debug/format.h" |
| + |
| +#if !defined(NDEBUG) |
| +// In debug builds, we use RAW_CHECK() to print useful error messages, if |
| +// Format() is called with broken arguments. |
| +// As our contract promises that Format() can be called from any crazy |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
nit: remove crazy
|
| +// run-time context, it is not actually safe to call logging functions from it; |
| +// and we only ever do so for debug builds and hope for the best. |
| +// We should _never_ call any logging function other than RAW_CHECK(), and |
| +// we should _never_ include any logging code that is active in production |
| +// builds. |
| +// In other words; please do not remove the #ifdef around this #include. |
| +// Instead, in production builds we opt for returning a degraded result, |
| +// whenever an error is encountered. |
| +// E.g. The broken function call |
| +// Format("errno = %d (%x)", errno, strerror(errno)) |
| +// will print something like |
| +// errno = 13, (%x) |
| +// instead of |
| +// errno = 13 (Access denied) |
| +// In most of the anticipated use cases, that's probably the preferred |
| +// behavior. |
| +#include "base/logging.h" |
| +#define RAW_DCHECK RAW_CHECK |
| +#else |
| +#define RAW_DCHECK(x) do { if (x) { } } while (0) |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
Do you want to just add this to base/logging.h ?
|
| +#endif |
| + |
| + |
| +namespace base { |
| +namespace debug { |
| + |
| +// The code in this file is extremely careful to be async-signal-safe. |
| +// |
| +// Most obviously, we avoid calling any code that could dynamically allocate |
| +// memory. Doing so would almost certainly result in bugs and dead-locks. |
| +// We also avoid calling any other STL functions that could have unintended |
| +// side-effects involving memory allocation or access to other shared |
| +// resources. |
| +// |
| +// But on top of that, we also avoid calling other library functions, as many |
| +// of them have the side-effect of calling getenv() (in order to deal with |
| +// localization) or accessing errno. The latter sounds benign, but there are |
| +// several execution contexts where it isn't even possible to safely read let |
| +// alone write errno. |
| +// |
| +// The stated design goal of the Format() function is that it can be called |
| +// from any context that can safely call C or C++ code (i.e. anything that |
| +// doesn't require assembly code). |
| +// |
| +// For a brief overview of some but not all of the issues with async-signal- |
| +// safety, refer to: |
| +// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html |
| + |
| +namespace { |
| + |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
|
| +inline bool IncrementCount(size_t* count, size_t inc = 1) { |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
Please, avoid default parameters. Just have a Incr
|
| + // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to |
| + // at most SSIZE_MAX. So, we know that "inc" is always in the range |
| + // 1..SSIZE_MAX. |
| + // This allows us to compute "SSIZE_MAX - inc" without incurring any |
| + // integer overflows. |
| + RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max()); |
| + if (*count > std::numeric_limits<ssize_t>::max() - inc) { |
| + *count = std::numeric_limits<ssize_t>::max(); |
| + return false; |
| + } else { |
| + *count += inc; |
| + return true; |
| + } |
| +} |
| + |
| +inline bool Out(char* buf, size_t sz, size_t* count, char ch) { |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
|
| + if (*count + 1 < sz) { |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
if (sz >= 1 && count < sz - 1)
|
| + buf[*count] = ch; |
| + IncrementCount(count); |
| + return true; |
| + } |
| + IncrementCount(count); |
| + return false; |
| +} |
| + |
| +inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding, |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
|
| + size_t len, char** ptr) { |
| + char *dst = *ptr; |
| + for (; padding > len; --padding) |
| + if (Out(buf, sz, count, pad)) |
| + ++dst; |
| + else { |
| + if (--padding) |
| + IncrementCount(count, padding-len); |
| + break; |
| + } |
| + *ptr = dst; |
| +} |
| + |
| +// POSIX doesn't define any async-signal-safe function for converting |
| +// an integer to ASCII. Define our own version. |
| +// |
| +// This also gives us the ability to make the function a little more powerful |
| +// and have it deal with padding, with truncation, and with predicting the |
| +// length of the untruncated output. |
| +// |
| +// IToASCII() converts an (optionally signed) integer to ASCII. It never |
| +// writes more than "sz" bytes. Output will be truncated as needed, and a NUL |
| +// character is appended, unless "sz" is zero. It returns the number of non-NUL |
| +// bytes that would be output if no truncation had happened. |
| +// |
| +// It supports bases 2 through 16. Padding can be done with either '0' zeros |
| +// or ' ' spaces. |
| +size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz, |
| + int base, size_t padding, char pad) { |
| + // Sanity check for the "base". |
| + if (base < 2 || base > 16 || (sign && base != 10)) { |
| + if (static_cast<ssize_t>(sz) >= 1) |
| + buf[0] = '\000'; |
| + return 0; |
| + } |
| + |
| + // Handle negative numbers, if requested by caller. |
| + size_t count = 0; |
| + size_t n = 1; |
| + char* start = buf; |
| + int minint = 0; |
| + bool needs_minus = false; |
| + uint64_t num; |
| + if (sign && i < 0) { |
| + // If we aren't inserting padding, or if we are padding with '0' zeros, |
| + // we should insert the minus character now. It makes it easier to |
| + // correctly deal with truncated padded numbers. |
| + // On the other hand, if we are padding with ' ' spaces, we have to |
| + // delay outputting the minus character until later. |
| + if (padding <= 2 || pad == '0') { |
| + ++count; |
| + |
| + // Make sure we can write the '-' character. |
| + if (++n > sz) { |
| + if (sz > 0) |
| + *start = '\000'; |
| + } else |
| + *start++ = '-'; |
| + |
| + // Adjust padding, since we just output one character already. |
| + if (padding) |
| + --padding; |
| + } else |
| + needs_minus = true; |
| + |
| + // Turn our number positive. |
| + if (i == std::numeric_limits<int64_t>::min()) { |
| + // The most negative integer needs special treatment. |
| + minint = 1; |
| + num = -(i + 1); |
| + } else { |
| + // "Normal" negative numbers are easy. |
| + num = -i; |
| + } |
| + } else |
| + num = i; |
| + |
| + // Loop until we have converted the entire number. Output at least one |
| + // character (i.e. '0'). |
| + char* ptr = start; |
| + bool started = false; |
| + do { |
| + // Sanity check. If padding is used to fill the entire address space, |
| + // don't allow more than SSIZE_MAX bytes. |
| + if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) { |
| + RAW_DCHECK(count < |
| + static_cast<size_t>(std::numeric_limits<ssize_t>::max())); |
| + break; |
| + } |
| + |
| + // Make sure there is still enough space left in our output buffer. |
| + if (n == sz) { |
| + if (ptr > start) { |
| + // It is rare that we need to output a partial number. But if asked |
| + // to do so, we will still make sure we output the correct number of |
| + // leading digits. |
| + // Since we are generating the digits in reverse order, we actually |
| + // have to discard digits in the order that we have already emitted |
| + // them. This is essentially equivalent to: |
| + // memmove(start, start+1, --ptr - start) |
| + --ptr; |
| + for (char* move = start; move < ptr; ++move) |
| + *move = move[1]; |
| + } else |
| + goto cannot_write_anything_but_nul; |
|
jln (very slow on Chromium)
2013/08/01 00:03:15
Any way to split this to a subfunction ?
|
| + } else |
| + ++n; |
| + |
| + // Output the next digit and (if necessary) compensate for the lowest- |
| + // most negative integer needing special treatment. This works because, |
| + // no matter the bit width of the integer, the lowest-most decimal |
| + // integer always ends in 2, 4, 6, or 8. |
| + if (n <= sz) { |
| + if (!num && started) |
| + if (needs_minus) { |
| + *ptr++ = '-'; |
| + needs_minus = false; |
| + } else |
| + *ptr++ = pad; |
| + else { |
| + started = true; |
| + *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef") |
| + [num%base+minint]; |
| + } |
| + } |
| + |
| + cannot_write_anything_but_nul: |
| + minint = 0; |
| + num /= base; |
| + |
| + // Add padding, if requested. |
| + if (padding > 0) { |
| + --padding; |
| + |
| + // Performance optimization for when we are asked to output |
| + // excessive padding, but our output buffer is limited in size. |
| + // Even if we output a 128bit number in binary, we would never |
| + // write more than 130 characters. So, anything beyond this limit |
| + // and we can compute the result arithmetically. |
| + if (count > n && count - n > 130) { |
| + IncrementCount(&count, padding); |
| + padding = 0; |
| + } |
| + } |
| + } while (num || padding || needs_minus); |
| + |
| + // Terminate the output with a NUL character. |
| + if (sz > 0) |
| + *ptr = '\000'; |
| + |
| + // Conversion to ASCII actually resulted in the digits being in reverse |
| + // order. We can't easily generate them in forward order, as we can't tell |
| + // the number of characters needed until we are done converting. |
| + // So, now, we reverse the string (except for the possible '-' sign). |
| + while (--ptr > start) { |
| + char ch = *ptr; |
| + *ptr = *start; |
| + *start++ = ch; |
| + } |
| + return count; |
| +} |
| + |
| +} // anonymous namespace |
| + |
| +ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt, |
| + const Arg* args, const size_t max_args) { |
| + // Make sure we can write at least one NUL byte. |
| + if (static_cast<ssize_t>(sz) < 1) |
| + return -1; |
| + |
| + // Iterate over format string and interpret '%' arguments as they are |
| + // encountered. |
| + char* ptr = buf; |
| + size_t padding; |
| + char pad; |
| + size_t count = 0; |
| + for (unsigned int cur_arg = 0; |
| + *fmt && |
| + count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) { |
| + if (*fmt++ == '%') { |
| + padding = 0; |
| + pad = ' '; |
| + char ch = *fmt++; |
| + format_character_found: |
| + switch (ch) { |
| + case '0': case '1': case '2': case '3': case '4': |
| + case '5': case '6': case '7': case '8': case '9': |
| + // Found a width parameter. Convert to an integer value and store in |
| + // "padding". If the leading digit is a zero, change the padding |
| + // character from a space ' ' to a zero '0'. |
| + pad = ch == '0' ? '0' : ' '; |
| + for (;;) { |
| + const size_t max_padding = std::numeric_limits<ssize_t>::max(); |
| + if (padding > max_padding/10 || |
| + 10*padding > max_padding - (ch - '0')) { |
| + RAW_DCHECK(padding <= max_padding/10 && |
| + 10*padding <= max_padding - (ch - '0')); |
| + // Integer overflow detected. Skip the rest of the width until |
| + // we find the format character, then do the normal error handling. |
| + while ((ch = *fmt++) >= '0' && ch <= '9') { |
| + } |
| + goto fail_to_expand; |
| + } |
| + padding = 10*padding + ch - '0'; |
| + ch = *fmt++; |
| + if (ch < '0' || ch > '9') { |
| + // Reached the end of the width parameter. This is where the format |
| + // character is found. |
| + goto format_character_found; |
| + } |
| + } |
| + break; |
| + case 'c': { // Output an ASCII character. |
| + // Check that there are arguments left to be inserted. |
| + if (cur_arg >= max_args) { |
| + RAW_DCHECK(cur_arg < max_args); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Check that the argument has the expected type. |
| + const Arg& arg = args[cur_arg++]; |
| + if (arg.type_ != Arg::INT && |
| + arg.type_ != Arg::UINT) { |
| + RAW_DCHECK(arg.type_ == Arg::INT || |
| + arg.type_ == Arg::UINT); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Apply padding, if needed. |
| + Pad(buf, sz, &count, ' ', padding, 1, &ptr); |
| + |
| + // Convert the argument to an ASCII character and output it. |
| + char ch = static_cast<char>(arg.i_); |
| + if (!ch) |
| + goto end_of_output_buffer; |
| + if (Out(buf, sz, &count, ch)) |
| + ++ptr; |
| + break; } |
| + case 'd': { // Output a signed or unsigned integer-like value. |
| + // Check that there are arguments left to be inserted. |
| + if (cur_arg >= max_args) { |
| + RAW_DCHECK(cur_arg < max_args); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Check that the argument has the expected type. |
| + const Arg& arg = args[cur_arg++]; |
| + if (arg.type_ != Arg::INT && |
| + arg.type_ != Arg::UINT) { |
| + RAW_DCHECK(arg.type_ == Arg::INT || |
| + arg.type_ == Arg::UINT); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Our implementation of IToASCII() can handle all widths of data types |
| + // and can print both signed and unsigned values. |
| + IncrementCount(&count, |
| + IToASCII(arg.type_ == Arg::INT, false, arg.i_, |
| + ptr, sz - (ptr - buf), 10, padding, pad)); |
| + |
| + // Advance "ptr" to the end of the string that was just emitted. |
| + if (sz - (ptr - buf)) |
| + while (*ptr) |
| + ++ptr; |
| + break; } |
| + case 'x': // Output an unsigned hexadecimal value. |
| + case 'X': |
| + case 'p': { // Output a pointer value. |
| + // Check that there are arguments left to be inserted. |
| + if (cur_arg >= max_args) { |
| + RAW_DCHECK(cur_arg < max_args); |
| + goto fail_to_expand; |
| + } |
| + |
| + const Arg& arg = args[cur_arg++]; |
| + int64_t i; |
| + switch (ch) { |
| + case 'x': // Hexadecimal values are available for integer-like args. |
| + case 'X': |
| + // Check that the argument has the expected type. |
| + if (arg.type_ != Arg::INT && |
| + arg.type_ != Arg::UINT) { |
| + RAW_DCHECK(arg.type_ == Arg::INT || |
| + arg.type_ == Arg::UINT); |
| + goto fail_to_expand; |
| + } |
| + i = arg.i_; |
| + |
| + // The Arg() constructor automatically performed sign expansion on |
| + // signed parameters. This is great when outputting a %d decimal |
| + // number, but can result in unexpected leading 0xFF bytes when |
| + // outputting a %c hexadecimal number. Mask bits, if necessary. |
| + // We have to do this here, instead of in the Arg() constructor, as |
| + // the Arg() constructor cannot tell whether we will output a %d |
| + // or a %x. Only the latter should experience masking. |
| + if (arg.width_ < sizeof(int64_t)) |
| + i &= (1LL << (8*arg.width_)) - 1; |
| + break; |
| + default: |
| + // Pointer values require an actual pointer or a string. |
| + if (arg.type_ == Arg::POINTER) |
| + i = reinterpret_cast<uintptr_t>(arg.ptr_); |
| + else if (arg.type_ == Arg::STRING) |
| + i = reinterpret_cast<uintptr_t>(arg.s_); |
| + else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) && |
| + arg.i_ == 0) // Allow C++'s version of NULL |
| + i = 0; |
| + else { |
| + RAW_DCHECK(arg.type_ == Arg::POINTER || |
| + arg.type_ == Arg::STRING); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Pointers always include the "0x" prefix. This affects padding. |
| + if (padding) { |
| + if (pad == ' ') { |
| + // Predict the number of hex digits (including "0x" prefix) that |
| + // will be output for this address when it is converted to ASCII. |
| + size_t chars = 2; |
| + uint64_t j = i; |
| + do { |
| + ++chars; |
| + j >>= 4; |
| + } while (j); |
| + |
| + // Output the necessary number of space characters to perform |
| + // padding. We can't rely on IToASCII() to do that for us, as it |
| + // would incorrectly add padding _after_ the "0x" prefix. |
| + Pad(buf, sz, &count, pad, padding, chars, &ptr); |
| + |
| + // Inform IToASCII() that it no longer needs to handle the |
| + // padding. |
| + padding = 0; |
| + } else { |
| + // Adjust for the two-character "0x" prefix. |
| + padding = padding >= 2 ? padding - 2 : 0; |
| + } |
| + } |
| + |
| + // Insert "0x" prefix, if there is still sufficient space in the |
| + // output buffer. |
| + if (Out(buf, sz, &count, '0')) |
| + ++ptr; |
| + if (Out(buf, sz, &count, 'x')) |
| + ++ptr; |
| + break; |
| + } |
| + |
| + // No matter what data type this value originated from, print it as |
| + // a regular hexadecimal number. |
| + IncrementCount(&count, |
| + IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf), |
| + 16, padding, pad)); |
| + |
| + // Advance "ptr" to the end of the string that was just emitted. |
| + if (sz - (ptr - buf)) |
| + while (*ptr) |
| + ++ptr; |
| + break; } |
| + case 's': { |
| + // Check that there are arguments left to be inserted. |
| + if (cur_arg >= max_args) { |
| + RAW_DCHECK(cur_arg < max_args); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Check that the argument has the expected type. |
| + const Arg& arg = args[cur_arg++]; |
| + const char *s; |
| + if (arg.type_ == Arg::STRING) |
| + s = arg.s_ ? arg.s_ : "<NULL>"; |
| + else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) && |
| + arg.i_ == 0) // Allow C++'s version of NULL |
| + s = "<NULL>"; |
| + else { |
| + RAW_DCHECK(arg.type_ == Arg::STRING); |
| + goto fail_to_expand; |
| + } |
| + |
| + // Apply padding, if needed. This requires us to first check the |
| + // length of the string that we are outputting. |
| + if (padding) { |
| + size_t len = 0; |
| + for (const char* src = s; *src++; ) |
| + ++len; |
| + Pad(buf, sz, &count, ' ', padding, len, &ptr); |
| + } |
| + |
| + // Printing a string involves nothing more than copying it into the |
| + // output buffer and making sure we don't output more bytes than |
| + // available space. |
| + for (const char* src = s; *src; ) |
| + if (Out(buf, sz, &count, *src++)) |
| + ++ptr; |
| + break; } |
| + case '%': |
| + // Quoted percent '%' character. |
| + goto copy_verbatim; |
| + fail_to_expand: |
| + // C++ gives us tools to do type checking -- something that snprintf() |
| + // could never really do. So, whenever we see arguments that don't |
| + // match up with the format string, we refuse to output them. But |
| + // since we have to be extremely conservative about being async- |
| + // signal-safe, we are limited in the type of error handling that we |
| + // can do in production builds (in debug builds we can use RAW_DCHECK() |
| + // and hope for the best). So, all we do is pass the format string |
| + // unchanged. That should eventually get the user's attention; and in |
| + // the meantime, it hopefully doesn't lose too much data. |
| + default: |
| + // Unknown or unsupported format character. Just copy verbatim to |
| + // output. |
| + if (Out(buf, sz, &count, '%')) |
| + ++ptr; |
| + if (!ch) |
| + goto end_of_format_string; |
| + if (Out(buf, sz, &count, ch)) |
| + ++ptr; |
| + break; |
| + } |
| + } else { |
| + copy_verbatim: |
| + if (Out(buf, sz, &count, fmt[-1])) |
| + ++ptr; |
| + } |
| + } |
| + end_of_format_string: |
| + end_of_output_buffer: |
| + *ptr = '\000'; |
| + IncrementCount(&count); |
| + return static_cast<ssize_t>(count)-1; |
| +} |
| + |
| +ssize_t FormatN(char* buf, size_t N, const char* fmt) { |
| + // Make sure we can write at least one NUL byte. |
| + ssize_t n = static_cast<ssize_t>(N); |
| + if (n < 1) |
| + return -1; |
| + size_t count = 0; |
| + |
| + // In the slow-path, we deal with errors by copying the contents of |
| + // "fmt" unexpanded. This means, if there are no arguments passed, the |
| + // Format() function always degenerates to version of strncpy() that |
| + // de-duplicates '%' characters. |
| + char* dst = buf; |
| + const char* src = fmt; |
| + for (; *src; ++src) { |
| + char ch = *src; |
| + if (!IncrementCount(&count) && n > 1) { |
| + --dst; |
| + break; |
| + } |
| + if (n > 1) { |
| + --n; |
| + *dst++ = ch; |
| + } |
| + if (ch == '%' && src[1] == '%') |
| + ++src; |
| + } |
| + IncrementCount(&count); |
| + *dst = '\000'; |
| + return static_cast<ssize_t>(count)-1; |
| +} |
| + |
| +} // namespace debug |
| +} // namespace base |