Index: base/debug/format.cc |
diff --git a/base/debug/format.cc b/base/debug/format.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..bfd790795c2add19be70bee5a564dd2f8395873a |
--- /dev/null |
+++ b/base/debug/format.cc |
@@ -0,0 +1,556 @@ |
+// Copyright (c) 2013 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+// |
+// Author: markus@chromium.org |
+ |
+#include <limits> |
+ |
+#include "base/debug/format.h" |
+ |
+#if !defined(NDEBUG) |
+// In debug builds, we use RAW_CHECK() to print useful error messages, if |
+// Format() is called with broken arguments. |
+// As our contract promises that Format() can be called from any crazy |
jln (very slow on Chromium)
2013/08/01 00:03:15
nit: remove crazy
|
+// run-time context, it is not actually safe to call logging functions from it; |
+// and we only ever do so for debug builds and hope for the best. |
+// We should _never_ call any logging function other than RAW_CHECK(), and |
+// we should _never_ include any logging code that is active in production |
+// builds. |
+// In other words; please do not remove the #ifdef around this #include. |
+// Instead, in production builds we opt for returning a degraded result, |
+// whenever an error is encountered. |
+// E.g. The broken function call |
+// Format("errno = %d (%x)", errno, strerror(errno)) |
+// will print something like |
+// errno = 13, (%x) |
+// instead of |
+// errno = 13 (Access denied) |
+// In most of the anticipated use cases, that's probably the preferred |
+// behavior. |
+#include "base/logging.h" |
+#define RAW_DCHECK RAW_CHECK |
+#else |
+#define RAW_DCHECK(x) do { if (x) { } } while (0) |
jln (very slow on Chromium)
2013/08/01 00:03:15
Do you want to just add this to base/logging.h ?
|
+#endif |
+ |
+ |
+namespace base { |
+namespace debug { |
+ |
+// The code in this file is extremely careful to be async-signal-safe. |
+// |
+// Most obviously, we avoid calling any code that could dynamically allocate |
+// memory. Doing so would almost certainly result in bugs and dead-locks. |
+// We also avoid calling any other STL functions that could have unintended |
+// side-effects involving memory allocation or access to other shared |
+// resources. |
+// |
+// But on top of that, we also avoid calling other library functions, as many |
+// of them have the side-effect of calling getenv() (in order to deal with |
+// localization) or accessing errno. The latter sounds benign, but there are |
+// several execution contexts where it isn't even possible to safely read let |
+// alone write errno. |
+// |
+// The stated design goal of the Format() function is that it can be called |
+// from any context that can safely call C or C++ code (i.e. anything that |
+// doesn't require assembly code). |
+// |
+// For a brief overview of some but not all of the issues with async-signal- |
+// safety, refer to: |
+// http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html |
+ |
+namespace { |
+ |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
|
+inline bool IncrementCount(size_t* count, size_t inc = 1) { |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please, avoid default parameters. Just have a Incr
|
+ // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to |
+ // at most SSIZE_MAX. So, we know that "inc" is always in the range |
+ // 1..SSIZE_MAX. |
+ // This allows us to compute "SSIZE_MAX - inc" without incurring any |
+ // integer overflows. |
+ RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max()); |
+ if (*count > std::numeric_limits<ssize_t>::max() - inc) { |
+ *count = std::numeric_limits<ssize_t>::max(); |
+ return false; |
+ } else { |
+ *count += inc; |
+ return true; |
+ } |
+} |
+ |
+inline bool Out(char* buf, size_t sz, size_t* count, char ch) { |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
|
+ if (*count + 1 < sz) { |
jln (very slow on Chromium)
2013/08/01 00:03:15
if (sz >= 1 && count < sz - 1)
|
+ buf[*count] = ch; |
+ IncrementCount(count); |
+ return true; |
+ } |
+ IncrementCount(count); |
+ return false; |
+} |
+ |
+inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding, |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
|
+ size_t len, char** ptr) { |
+ char *dst = *ptr; |
+ for (; padding > len; --padding) |
+ if (Out(buf, sz, count, pad)) |
+ ++dst; |
+ else { |
+ if (--padding) |
+ IncrementCount(count, padding-len); |
+ break; |
+ } |
+ *ptr = dst; |
+} |
+ |
+// POSIX doesn't define any async-signal-safe function for converting |
+// an integer to ASCII. Define our own version. |
+// |
+// This also gives us the ability to make the function a little more powerful |
+// and have it deal with padding, with truncation, and with predicting the |
+// length of the untruncated output. |
+// |
+// IToASCII() converts an (optionally signed) integer to ASCII. It never |
+// writes more than "sz" bytes. Output will be truncated as needed, and a NUL |
+// character is appended, unless "sz" is zero. It returns the number of non-NUL |
+// bytes that would be output if no truncation had happened. |
+// |
+// It supports bases 2 through 16. Padding can be done with either '0' zeros |
+// or ' ' spaces. |
+size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz, |
+ int base, size_t padding, char pad) { |
+ // Sanity check for the "base". |
+ if (base < 2 || base > 16 || (sign && base != 10)) { |
+ if (static_cast<ssize_t>(sz) >= 1) |
+ buf[0] = '\000'; |
+ return 0; |
+ } |
+ |
+ // Handle negative numbers, if requested by caller. |
+ size_t count = 0; |
+ size_t n = 1; |
+ char* start = buf; |
+ int minint = 0; |
+ bool needs_minus = false; |
+ uint64_t num; |
+ if (sign && i < 0) { |
+ // If we aren't inserting padding, or if we are padding with '0' zeros, |
+ // we should insert the minus character now. It makes it easier to |
+ // correctly deal with truncated padded numbers. |
+ // On the other hand, if we are padding with ' ' spaces, we have to |
+ // delay outputting the minus character until later. |
+ if (padding <= 2 || pad == '0') { |
+ ++count; |
+ |
+ // Make sure we can write the '-' character. |
+ if (++n > sz) { |
+ if (sz > 0) |
+ *start = '\000'; |
+ } else |
+ *start++ = '-'; |
+ |
+ // Adjust padding, since we just output one character already. |
+ if (padding) |
+ --padding; |
+ } else |
+ needs_minus = true; |
+ |
+ // Turn our number positive. |
+ if (i == std::numeric_limits<int64_t>::min()) { |
+ // The most negative integer needs special treatment. |
+ minint = 1; |
+ num = -(i + 1); |
+ } else { |
+ // "Normal" negative numbers are easy. |
+ num = -i; |
+ } |
+ } else |
+ num = i; |
+ |
+ // Loop until we have converted the entire number. Output at least one |
+ // character (i.e. '0'). |
+ char* ptr = start; |
+ bool started = false; |
+ do { |
+ // Sanity check. If padding is used to fill the entire address space, |
+ // don't allow more than SSIZE_MAX bytes. |
+ if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) { |
+ RAW_DCHECK(count < |
+ static_cast<size_t>(std::numeric_limits<ssize_t>::max())); |
+ break; |
+ } |
+ |
+ // Make sure there is still enough space left in our output buffer. |
+ if (n == sz) { |
+ if (ptr > start) { |
+ // It is rare that we need to output a partial number. But if asked |
+ // to do so, we will still make sure we output the correct number of |
+ // leading digits. |
+ // Since we are generating the digits in reverse order, we actually |
+ // have to discard digits in the order that we have already emitted |
+ // them. This is essentially equivalent to: |
+ // memmove(start, start+1, --ptr - start) |
+ --ptr; |
+ for (char* move = start; move < ptr; ++move) |
+ *move = move[1]; |
+ } else |
+ goto cannot_write_anything_but_nul; |
jln (very slow on Chromium)
2013/08/01 00:03:15
Any way to split this to a subfunction ?
|
+ } else |
+ ++n; |
+ |
+ // Output the next digit and (if necessary) compensate for the lowest- |
+ // most negative integer needing special treatment. This works because, |
+ // no matter the bit width of the integer, the lowest-most decimal |
+ // integer always ends in 2, 4, 6, or 8. |
+ if (n <= sz) { |
+ if (!num && started) |
+ if (needs_minus) { |
+ *ptr++ = '-'; |
+ needs_minus = false; |
+ } else |
+ *ptr++ = pad; |
+ else { |
+ started = true; |
+ *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef") |
+ [num%base+minint]; |
+ } |
+ } |
+ |
+ cannot_write_anything_but_nul: |
+ minint = 0; |
+ num /= base; |
+ |
+ // Add padding, if requested. |
+ if (padding > 0) { |
+ --padding; |
+ |
+ // Performance optimization for when we are asked to output |
+ // excessive padding, but our output buffer is limited in size. |
+ // Even if we output a 128bit number in binary, we would never |
+ // write more than 130 characters. So, anything beyond this limit |
+ // and we can compute the result arithmetically. |
+ if (count > n && count - n > 130) { |
+ IncrementCount(&count, padding); |
+ padding = 0; |
+ } |
+ } |
+ } while (num || padding || needs_minus); |
+ |
+ // Terminate the output with a NUL character. |
+ if (sz > 0) |
+ *ptr = '\000'; |
+ |
+ // Conversion to ASCII actually resulted in the digits being in reverse |
+ // order. We can't easily generate them in forward order, as we can't tell |
+ // the number of characters needed until we are done converting. |
+ // So, now, we reverse the string (except for the possible '-' sign). |
+ while (--ptr > start) { |
+ char ch = *ptr; |
+ *ptr = *start; |
+ *start++ = ch; |
+ } |
+ return count; |
+} |
+ |
+} // anonymous namespace |
+ |
+ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt, |
+ const Arg* args, const size_t max_args) { |
+ // Make sure we can write at least one NUL byte. |
+ if (static_cast<ssize_t>(sz) < 1) |
+ return -1; |
+ |
+ // Iterate over format string and interpret '%' arguments as they are |
+ // encountered. |
+ char* ptr = buf; |
+ size_t padding; |
+ char pad; |
+ size_t count = 0; |
+ for (unsigned int cur_arg = 0; |
+ *fmt && |
+ count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) { |
+ if (*fmt++ == '%') { |
+ padding = 0; |
+ pad = ' '; |
+ char ch = *fmt++; |
+ format_character_found: |
+ switch (ch) { |
+ case '0': case '1': case '2': case '3': case '4': |
+ case '5': case '6': case '7': case '8': case '9': |
+ // Found a width parameter. Convert to an integer value and store in |
+ // "padding". If the leading digit is a zero, change the padding |
+ // character from a space ' ' to a zero '0'. |
+ pad = ch == '0' ? '0' : ' '; |
+ for (;;) { |
+ const size_t max_padding = std::numeric_limits<ssize_t>::max(); |
+ if (padding > max_padding/10 || |
+ 10*padding > max_padding - (ch - '0')) { |
+ RAW_DCHECK(padding <= max_padding/10 && |
+ 10*padding <= max_padding - (ch - '0')); |
+ // Integer overflow detected. Skip the rest of the width until |
+ // we find the format character, then do the normal error handling. |
+ while ((ch = *fmt++) >= '0' && ch <= '9') { |
+ } |
+ goto fail_to_expand; |
+ } |
+ padding = 10*padding + ch - '0'; |
+ ch = *fmt++; |
+ if (ch < '0' || ch > '9') { |
+ // Reached the end of the width parameter. This is where the format |
+ // character is found. |
+ goto format_character_found; |
+ } |
+ } |
+ break; |
+ case 'c': { // Output an ASCII character. |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) { |
+ RAW_DCHECK(cur_arg < max_args); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Check that the argument has the expected type. |
+ const Arg& arg = args[cur_arg++]; |
+ if (arg.type_ != Arg::INT && |
+ arg.type_ != Arg::UINT) { |
+ RAW_DCHECK(arg.type_ == Arg::INT || |
+ arg.type_ == Arg::UINT); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Apply padding, if needed. |
+ Pad(buf, sz, &count, ' ', padding, 1, &ptr); |
+ |
+ // Convert the argument to an ASCII character and output it. |
+ char ch = static_cast<char>(arg.i_); |
+ if (!ch) |
+ goto end_of_output_buffer; |
+ if (Out(buf, sz, &count, ch)) |
+ ++ptr; |
+ break; } |
+ case 'd': { // Output a signed or unsigned integer-like value. |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) { |
+ RAW_DCHECK(cur_arg < max_args); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Check that the argument has the expected type. |
+ const Arg& arg = args[cur_arg++]; |
+ if (arg.type_ != Arg::INT && |
+ arg.type_ != Arg::UINT) { |
+ RAW_DCHECK(arg.type_ == Arg::INT || |
+ arg.type_ == Arg::UINT); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Our implementation of IToASCII() can handle all widths of data types |
+ // and can print both signed and unsigned values. |
+ IncrementCount(&count, |
+ IToASCII(arg.type_ == Arg::INT, false, arg.i_, |
+ ptr, sz - (ptr - buf), 10, padding, pad)); |
+ |
+ // Advance "ptr" to the end of the string that was just emitted. |
+ if (sz - (ptr - buf)) |
+ while (*ptr) |
+ ++ptr; |
+ break; } |
+ case 'x': // Output an unsigned hexadecimal value. |
+ case 'X': |
+ case 'p': { // Output a pointer value. |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) { |
+ RAW_DCHECK(cur_arg < max_args); |
+ goto fail_to_expand; |
+ } |
+ |
+ const Arg& arg = args[cur_arg++]; |
+ int64_t i; |
+ switch (ch) { |
+ case 'x': // Hexadecimal values are available for integer-like args. |
+ case 'X': |
+ // Check that the argument has the expected type. |
+ if (arg.type_ != Arg::INT && |
+ arg.type_ != Arg::UINT) { |
+ RAW_DCHECK(arg.type_ == Arg::INT || |
+ arg.type_ == Arg::UINT); |
+ goto fail_to_expand; |
+ } |
+ i = arg.i_; |
+ |
+ // The Arg() constructor automatically performed sign expansion on |
+ // signed parameters. This is great when outputting a %d decimal |
+ // number, but can result in unexpected leading 0xFF bytes when |
+ // outputting a %c hexadecimal number. Mask bits, if necessary. |
+ // We have to do this here, instead of in the Arg() constructor, as |
+ // the Arg() constructor cannot tell whether we will output a %d |
+ // or a %x. Only the latter should experience masking. |
+ if (arg.width_ < sizeof(int64_t)) |
+ i &= (1LL << (8*arg.width_)) - 1; |
+ break; |
+ default: |
+ // Pointer values require an actual pointer or a string. |
+ if (arg.type_ == Arg::POINTER) |
+ i = reinterpret_cast<uintptr_t>(arg.ptr_); |
+ else if (arg.type_ == Arg::STRING) |
+ i = reinterpret_cast<uintptr_t>(arg.s_); |
+ else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) && |
+ arg.i_ == 0) // Allow C++'s version of NULL |
+ i = 0; |
+ else { |
+ RAW_DCHECK(arg.type_ == Arg::POINTER || |
+ arg.type_ == Arg::STRING); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Pointers always include the "0x" prefix. This affects padding. |
+ if (padding) { |
+ if (pad == ' ') { |
+ // Predict the number of hex digits (including "0x" prefix) that |
+ // will be output for this address when it is converted to ASCII. |
+ size_t chars = 2; |
+ uint64_t j = i; |
+ do { |
+ ++chars; |
+ j >>= 4; |
+ } while (j); |
+ |
+ // Output the necessary number of space characters to perform |
+ // padding. We can't rely on IToASCII() to do that for us, as it |
+ // would incorrectly add padding _after_ the "0x" prefix. |
+ Pad(buf, sz, &count, pad, padding, chars, &ptr); |
+ |
+ // Inform IToASCII() that it no longer needs to handle the |
+ // padding. |
+ padding = 0; |
+ } else { |
+ // Adjust for the two-character "0x" prefix. |
+ padding = padding >= 2 ? padding - 2 : 0; |
+ } |
+ } |
+ |
+ // Insert "0x" prefix, if there is still sufficient space in the |
+ // output buffer. |
+ if (Out(buf, sz, &count, '0')) |
+ ++ptr; |
+ if (Out(buf, sz, &count, 'x')) |
+ ++ptr; |
+ break; |
+ } |
+ |
+ // No matter what data type this value originated from, print it as |
+ // a regular hexadecimal number. |
+ IncrementCount(&count, |
+ IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf), |
+ 16, padding, pad)); |
+ |
+ // Advance "ptr" to the end of the string that was just emitted. |
+ if (sz - (ptr - buf)) |
+ while (*ptr) |
+ ++ptr; |
+ break; } |
+ case 's': { |
+ // Check that there are arguments left to be inserted. |
+ if (cur_arg >= max_args) { |
+ RAW_DCHECK(cur_arg < max_args); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Check that the argument has the expected type. |
+ const Arg& arg = args[cur_arg++]; |
+ const char *s; |
+ if (arg.type_ == Arg::STRING) |
+ s = arg.s_ ? arg.s_ : "<NULL>"; |
+ else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) && |
+ arg.i_ == 0) // Allow C++'s version of NULL |
+ s = "<NULL>"; |
+ else { |
+ RAW_DCHECK(arg.type_ == Arg::STRING); |
+ goto fail_to_expand; |
+ } |
+ |
+ // Apply padding, if needed. This requires us to first check the |
+ // length of the string that we are outputting. |
+ if (padding) { |
+ size_t len = 0; |
+ for (const char* src = s; *src++; ) |
+ ++len; |
+ Pad(buf, sz, &count, ' ', padding, len, &ptr); |
+ } |
+ |
+ // Printing a string involves nothing more than copying it into the |
+ // output buffer and making sure we don't output more bytes than |
+ // available space. |
+ for (const char* src = s; *src; ) |
+ if (Out(buf, sz, &count, *src++)) |
+ ++ptr; |
+ break; } |
+ case '%': |
+ // Quoted percent '%' character. |
+ goto copy_verbatim; |
+ fail_to_expand: |
+ // C++ gives us tools to do type checking -- something that snprintf() |
+ // could never really do. So, whenever we see arguments that don't |
+ // match up with the format string, we refuse to output them. But |
+ // since we have to be extremely conservative about being async- |
+ // signal-safe, we are limited in the type of error handling that we |
+ // can do in production builds (in debug builds we can use RAW_DCHECK() |
+ // and hope for the best). So, all we do is pass the format string |
+ // unchanged. That should eventually get the user's attention; and in |
+ // the meantime, it hopefully doesn't lose too much data. |
+ default: |
+ // Unknown or unsupported format character. Just copy verbatim to |
+ // output. |
+ if (Out(buf, sz, &count, '%')) |
+ ++ptr; |
+ if (!ch) |
+ goto end_of_format_string; |
+ if (Out(buf, sz, &count, ch)) |
+ ++ptr; |
+ break; |
+ } |
+ } else { |
+ copy_verbatim: |
+ if (Out(buf, sz, &count, fmt[-1])) |
+ ++ptr; |
+ } |
+ } |
+ end_of_format_string: |
+ end_of_output_buffer: |
+ *ptr = '\000'; |
+ IncrementCount(&count); |
+ return static_cast<ssize_t>(count)-1; |
+} |
+ |
+ssize_t FormatN(char* buf, size_t N, const char* fmt) { |
+ // Make sure we can write at least one NUL byte. |
+ ssize_t n = static_cast<ssize_t>(N); |
+ if (n < 1) |
+ return -1; |
+ size_t count = 0; |
+ |
+ // In the slow-path, we deal with errors by copying the contents of |
+ // "fmt" unexpanded. This means, if there are no arguments passed, the |
+ // Format() function always degenerates to version of strncpy() that |
+ // de-duplicates '%' characters. |
+ char* dst = buf; |
+ const char* src = fmt; |
+ for (; *src; ++src) { |
+ char ch = *src; |
+ if (!IncrementCount(&count) && n > 1) { |
+ --dst; |
+ break; |
+ } |
+ if (n > 1) { |
+ --n; |
+ *dst++ = ch; |
+ } |
+ if (ch == '%' && src[1] == '%') |
+ ++src; |
+ } |
+ IncrementCount(&count); |
+ *dst = '\000'; |
+ return static_cast<ssize_t>(count)-1; |
+} |
+ |
+} // namespace debug |
+} // namespace base |