Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(306)

Side by Side Diff: base/debug/format.cc

Issue 18656004: Added a new SafeSPrintf() function that implements snprintf() in an async-safe-fashion (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Addressed Jeffrey's comments Created 7 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // Author: markus@chromium.org
6
7 #include <limits>
8
9 #include "base/debug/format.h"
10
11 #if !defined(NDEBUG)
12 // In debug builds, we use RAW_CHECK() to print useful error messages, if
13 // Format() is called with broken arguments.
14 // As our contract promises that Format() can be called from any crazy
jln (very slow on Chromium) 2013/08/01 00:03:15 nit: remove crazy
15 // run-time context, it is not actually safe to call logging functions from it;
16 // and we only ever do so for debug builds and hope for the best.
17 // We should _never_ call any logging function other than RAW_CHECK(), and
18 // we should _never_ include any logging code that is active in production
19 // builds.
20 // In other words; please do not remove the #ifdef around this #include.
21 // Instead, in production builds we opt for returning a degraded result,
22 // whenever an error is encountered.
23 // E.g. The broken function call
24 // Format("errno = %d (%x)", errno, strerror(errno))
25 // will print something like
26 // errno = 13, (%x)
27 // instead of
28 // errno = 13 (Access denied)
29 // In most of the anticipated use cases, that's probably the preferred
30 // behavior.
31 #include "base/logging.h"
32 #define RAW_DCHECK RAW_CHECK
33 #else
34 #define RAW_DCHECK(x) do { if (x) { } } while (0)
jln (very slow on Chromium) 2013/08/01 00:03:15 Do you want to just add this to base/logging.h ?
35 #endif
36
37
38 namespace base {
39 namespace debug {
40
41 // The code in this file is extremely careful to be async-signal-safe.
42 //
43 // Most obviously, we avoid calling any code that could dynamically allocate
44 // memory. Doing so would almost certainly result in bugs and dead-locks.
45 // We also avoid calling any other STL functions that could have unintended
46 // side-effects involving memory allocation or access to other shared
47 // resources.
48 //
49 // But on top of that, we also avoid calling other library functions, as many
50 // of them have the side-effect of calling getenv() (in order to deal with
51 // localization) or accessing errno. The latter sounds benign, but there are
52 // several execution contexts where it isn't even possible to safely read let
53 // alone write errno.
54 //
55 // The stated design goal of the Format() function is that it can be called
56 // from any context that can safely call C or C++ code (i.e. anything that
57 // doesn't require assembly code).
58 //
59 // For a brief overview of some but not all of the issues with async-signal-
60 // safety, refer to:
61 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html
62
63 namespace {
64
jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|
65 inline bool IncrementCount(size_t* count, size_t inc = 1) {
jln (very slow on Chromium) 2013/08/01 00:03:15 Please, avoid default parameters. Just have a Incr
66 // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to
67 // at most SSIZE_MAX. So, we know that "inc" is always in the range
68 // 1..SSIZE_MAX.
69 // This allows us to compute "SSIZE_MAX - inc" without incurring any
70 // integer overflows.
71 RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max());
72 if (*count > std::numeric_limits<ssize_t>::max() - inc) {
73 *count = std::numeric_limits<ssize_t>::max();
74 return false;
75 } else {
76 *count += inc;
77 return true;
78 }
79 }
80
81 inline bool Out(char* buf, size_t sz, size_t* count, char ch) {
jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|
82 if (*count + 1 < sz) {
jln (very slow on Chromium) 2013/08/01 00:03:15 if (sz >= 1 && count < sz - 1)
83 buf[*count] = ch;
84 IncrementCount(count);
85 return true;
86 }
87 IncrementCount(count);
88 return false;
89 }
90
91 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding,
jln (very slow on Chromium) 2013/08/01 00:03:15 Please document this function and its |parameters|
92 size_t len, char** ptr) {
93 char *dst = *ptr;
94 for (; padding > len; --padding)
95 if (Out(buf, sz, count, pad))
96 ++dst;
97 else {
98 if (--padding)
99 IncrementCount(count, padding-len);
100 break;
101 }
102 *ptr = dst;
103 }
104
105 // POSIX doesn't define any async-signal-safe function for converting
106 // an integer to ASCII. Define our own version.
107 //
108 // This also gives us the ability to make the function a little more powerful
109 // and have it deal with padding, with truncation, and with predicting the
110 // length of the untruncated output.
111 //
112 // IToASCII() converts an (optionally signed) integer to ASCII. It never
113 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL
114 // character is appended, unless "sz" is zero. It returns the number of non-NUL
115 // bytes that would be output if no truncation had happened.
116 //
117 // It supports bases 2 through 16. Padding can be done with either '0' zeros
118 // or ' ' spaces.
119 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz,
120 int base, size_t padding, char pad) {
121 // Sanity check for the "base".
122 if (base < 2 || base > 16 || (sign && base != 10)) {
123 if (static_cast<ssize_t>(sz) >= 1)
124 buf[0] = '\000';
125 return 0;
126 }
127
128 // Handle negative numbers, if requested by caller.
129 size_t count = 0;
130 size_t n = 1;
131 char* start = buf;
132 int minint = 0;
133 bool needs_minus = false;
134 uint64_t num;
135 if (sign && i < 0) {
136 // If we aren't inserting padding, or if we are padding with '0' zeros,
137 // we should insert the minus character now. It makes it easier to
138 // correctly deal with truncated padded numbers.
139 // On the other hand, if we are padding with ' ' spaces, we have to
140 // delay outputting the minus character until later.
141 if (padding <= 2 || pad == '0') {
142 ++count;
143
144 // Make sure we can write the '-' character.
145 if (++n > sz) {
146 if (sz > 0)
147 *start = '\000';
148 } else
149 *start++ = '-';
150
151 // Adjust padding, since we just output one character already.
152 if (padding)
153 --padding;
154 } else
155 needs_minus = true;
156
157 // Turn our number positive.
158 if (i == std::numeric_limits<int64_t>::min()) {
159 // The most negative integer needs special treatment.
160 minint = 1;
161 num = -(i + 1);
162 } else {
163 // "Normal" negative numbers are easy.
164 num = -i;
165 }
166 } else
167 num = i;
168
169 // Loop until we have converted the entire number. Output at least one
170 // character (i.e. '0').
171 char* ptr = start;
172 bool started = false;
173 do {
174 // Sanity check. If padding is used to fill the entire address space,
175 // don't allow more than SSIZE_MAX bytes.
176 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) {
177 RAW_DCHECK(count <
178 static_cast<size_t>(std::numeric_limits<ssize_t>::max()));
179 break;
180 }
181
182 // Make sure there is still enough space left in our output buffer.
183 if (n == sz) {
184 if (ptr > start) {
185 // It is rare that we need to output a partial number. But if asked
186 // to do so, we will still make sure we output the correct number of
187 // leading digits.
188 // Since we are generating the digits in reverse order, we actually
189 // have to discard digits in the order that we have already emitted
190 // them. This is essentially equivalent to:
191 // memmove(start, start+1, --ptr - start)
192 --ptr;
193 for (char* move = start; move < ptr; ++move)
194 *move = move[1];
195 } else
196 goto cannot_write_anything_but_nul;
jln (very slow on Chromium) 2013/08/01 00:03:15 Any way to split this to a subfunction ?
197 } else
198 ++n;
199
200 // Output the next digit and (if necessary) compensate for the lowest-
201 // most negative integer needing special treatment. This works because,
202 // no matter the bit width of the integer, the lowest-most decimal
203 // integer always ends in 2, 4, 6, or 8.
204 if (n <= sz) {
205 if (!num && started)
206 if (needs_minus) {
207 *ptr++ = '-';
208 needs_minus = false;
209 } else
210 *ptr++ = pad;
211 else {
212 started = true;
213 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef")
214 [num%base+minint];
215 }
216 }
217
218 cannot_write_anything_but_nul:
219 minint = 0;
220 num /= base;
221
222 // Add padding, if requested.
223 if (padding > 0) {
224 --padding;
225
226 // Performance optimization for when we are asked to output
227 // excessive padding, but our output buffer is limited in size.
228 // Even if we output a 128bit number in binary, we would never
229 // write more than 130 characters. So, anything beyond this limit
230 // and we can compute the result arithmetically.
231 if (count > n && count - n > 130) {
232 IncrementCount(&count, padding);
233 padding = 0;
234 }
235 }
236 } while (num || padding || needs_minus);
237
238 // Terminate the output with a NUL character.
239 if (sz > 0)
240 *ptr = '\000';
241
242 // Conversion to ASCII actually resulted in the digits being in reverse
243 // order. We can't easily generate them in forward order, as we can't tell
244 // the number of characters needed until we are done converting.
245 // So, now, we reverse the string (except for the possible '-' sign).
246 while (--ptr > start) {
247 char ch = *ptr;
248 *ptr = *start;
249 *start++ = ch;
250 }
251 return count;
252 }
253
254 } // anonymous namespace
255
256 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt,
257 const Arg* args, const size_t max_args) {
258 // Make sure we can write at least one NUL byte.
259 if (static_cast<ssize_t>(sz) < 1)
260 return -1;
261
262 // Iterate over format string and interpret '%' arguments as they are
263 // encountered.
264 char* ptr = buf;
265 size_t padding;
266 char pad;
267 size_t count = 0;
268 for (unsigned int cur_arg = 0;
269 *fmt &&
270 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) {
271 if (*fmt++ == '%') {
272 padding = 0;
273 pad = ' ';
274 char ch = *fmt++;
275 format_character_found:
276 switch (ch) {
277 case '0': case '1': case '2': case '3': case '4':
278 case '5': case '6': case '7': case '8': case '9':
279 // Found a width parameter. Convert to an integer value and store in
280 // "padding". If the leading digit is a zero, change the padding
281 // character from a space ' ' to a zero '0'.
282 pad = ch == '0' ? '0' : ' ';
283 for (;;) {
284 const size_t max_padding = std::numeric_limits<ssize_t>::max();
285 if (padding > max_padding/10 ||
286 10*padding > max_padding - (ch - '0')) {
287 RAW_DCHECK(padding <= max_padding/10 &&
288 10*padding <= max_padding - (ch - '0'));
289 // Integer overflow detected. Skip the rest of the width until
290 // we find the format character, then do the normal error handling.
291 while ((ch = *fmt++) >= '0' && ch <= '9') {
292 }
293 goto fail_to_expand;
294 }
295 padding = 10*padding + ch - '0';
296 ch = *fmt++;
297 if (ch < '0' || ch > '9') {
298 // Reached the end of the width parameter. This is where the format
299 // character is found.
300 goto format_character_found;
301 }
302 }
303 break;
304 case 'c': { // Output an ASCII character.
305 // Check that there are arguments left to be inserted.
306 if (cur_arg >= max_args) {
307 RAW_DCHECK(cur_arg < max_args);
308 goto fail_to_expand;
309 }
310
311 // Check that the argument has the expected type.
312 const Arg& arg = args[cur_arg++];
313 if (arg.type_ != Arg::INT &&
314 arg.type_ != Arg::UINT) {
315 RAW_DCHECK(arg.type_ == Arg::INT ||
316 arg.type_ == Arg::UINT);
317 goto fail_to_expand;
318 }
319
320 // Apply padding, if needed.
321 Pad(buf, sz, &count, ' ', padding, 1, &ptr);
322
323 // Convert the argument to an ASCII character and output it.
324 char ch = static_cast<char>(arg.i_);
325 if (!ch)
326 goto end_of_output_buffer;
327 if (Out(buf, sz, &count, ch))
328 ++ptr;
329 break; }
330 case 'd': { // Output a signed or unsigned integer-like value.
331 // Check that there are arguments left to be inserted.
332 if (cur_arg >= max_args) {
333 RAW_DCHECK(cur_arg < max_args);
334 goto fail_to_expand;
335 }
336
337 // Check that the argument has the expected type.
338 const Arg& arg = args[cur_arg++];
339 if (arg.type_ != Arg::INT &&
340 arg.type_ != Arg::UINT) {
341 RAW_DCHECK(arg.type_ == Arg::INT ||
342 arg.type_ == Arg::UINT);
343 goto fail_to_expand;
344 }
345
346 // Our implementation of IToASCII() can handle all widths of data types
347 // and can print both signed and unsigned values.
348 IncrementCount(&count,
349 IToASCII(arg.type_ == Arg::INT, false, arg.i_,
350 ptr, sz - (ptr - buf), 10, padding, pad));
351
352 // Advance "ptr" to the end of the string that was just emitted.
353 if (sz - (ptr - buf))
354 while (*ptr)
355 ++ptr;
356 break; }
357 case 'x': // Output an unsigned hexadecimal value.
358 case 'X':
359 case 'p': { // Output a pointer value.
360 // Check that there are arguments left to be inserted.
361 if (cur_arg >= max_args) {
362 RAW_DCHECK(cur_arg < max_args);
363 goto fail_to_expand;
364 }
365
366 const Arg& arg = args[cur_arg++];
367 int64_t i;
368 switch (ch) {
369 case 'x': // Hexadecimal values are available for integer-like args.
370 case 'X':
371 // Check that the argument has the expected type.
372 if (arg.type_ != Arg::INT &&
373 arg.type_ != Arg::UINT) {
374 RAW_DCHECK(arg.type_ == Arg::INT ||
375 arg.type_ == Arg::UINT);
376 goto fail_to_expand;
377 }
378 i = arg.i_;
379
380 // The Arg() constructor automatically performed sign expansion on
381 // signed parameters. This is great when outputting a %d decimal
382 // number, but can result in unexpected leading 0xFF bytes when
383 // outputting a %c hexadecimal number. Mask bits, if necessary.
384 // We have to do this here, instead of in the Arg() constructor, as
385 // the Arg() constructor cannot tell whether we will output a %d
386 // or a %x. Only the latter should experience masking.
387 if (arg.width_ < sizeof(int64_t))
388 i &= (1LL << (8*arg.width_)) - 1;
389 break;
390 default:
391 // Pointer values require an actual pointer or a string.
392 if (arg.type_ == Arg::POINTER)
393 i = reinterpret_cast<uintptr_t>(arg.ptr_);
394 else if (arg.type_ == Arg::STRING)
395 i = reinterpret_cast<uintptr_t>(arg.s_);
396 else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&
397 arg.i_ == 0) // Allow C++'s version of NULL
398 i = 0;
399 else {
400 RAW_DCHECK(arg.type_ == Arg::POINTER ||
401 arg.type_ == Arg::STRING);
402 goto fail_to_expand;
403 }
404
405 // Pointers always include the "0x" prefix. This affects padding.
406 if (padding) {
407 if (pad == ' ') {
408 // Predict the number of hex digits (including "0x" prefix) that
409 // will be output for this address when it is converted to ASCII.
410 size_t chars = 2;
411 uint64_t j = i;
412 do {
413 ++chars;
414 j >>= 4;
415 } while (j);
416
417 // Output the necessary number of space characters to perform
418 // padding. We can't rely on IToASCII() to do that for us, as it
419 // would incorrectly add padding _after_ the "0x" prefix.
420 Pad(buf, sz, &count, pad, padding, chars, &ptr);
421
422 // Inform IToASCII() that it no longer needs to handle the
423 // padding.
424 padding = 0;
425 } else {
426 // Adjust for the two-character "0x" prefix.
427 padding = padding >= 2 ? padding - 2 : 0;
428 }
429 }
430
431 // Insert "0x" prefix, if there is still sufficient space in the
432 // output buffer.
433 if (Out(buf, sz, &count, '0'))
434 ++ptr;
435 if (Out(buf, sz, &count, 'x'))
436 ++ptr;
437 break;
438 }
439
440 // No matter what data type this value originated from, print it as
441 // a regular hexadecimal number.
442 IncrementCount(&count,
443 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf),
444 16, padding, pad));
445
446 // Advance "ptr" to the end of the string that was just emitted.
447 if (sz - (ptr - buf))
448 while (*ptr)
449 ++ptr;
450 break; }
451 case 's': {
452 // Check that there are arguments left to be inserted.
453 if (cur_arg >= max_args) {
454 RAW_DCHECK(cur_arg < max_args);
455 goto fail_to_expand;
456 }
457
458 // Check that the argument has the expected type.
459 const Arg& arg = args[cur_arg++];
460 const char *s;
461 if (arg.type_ == Arg::STRING)
462 s = arg.s_ ? arg.s_ : "<NULL>";
463 else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) &&
464 arg.i_ == 0) // Allow C++'s version of NULL
465 s = "<NULL>";
466 else {
467 RAW_DCHECK(arg.type_ == Arg::STRING);
468 goto fail_to_expand;
469 }
470
471 // Apply padding, if needed. This requires us to first check the
472 // length of the string that we are outputting.
473 if (padding) {
474 size_t len = 0;
475 for (const char* src = s; *src++; )
476 ++len;
477 Pad(buf, sz, &count, ' ', padding, len, &ptr);
478 }
479
480 // Printing a string involves nothing more than copying it into the
481 // output buffer and making sure we don't output more bytes than
482 // available space.
483 for (const char* src = s; *src; )
484 if (Out(buf, sz, &count, *src++))
485 ++ptr;
486 break; }
487 case '%':
488 // Quoted percent '%' character.
489 goto copy_verbatim;
490 fail_to_expand:
491 // C++ gives us tools to do type checking -- something that snprintf()
492 // could never really do. So, whenever we see arguments that don't
493 // match up with the format string, we refuse to output them. But
494 // since we have to be extremely conservative about being async-
495 // signal-safe, we are limited in the type of error handling that we
496 // can do in production builds (in debug builds we can use RAW_DCHECK()
497 // and hope for the best). So, all we do is pass the format string
498 // unchanged. That should eventually get the user's attention; and in
499 // the meantime, it hopefully doesn't lose too much data.
500 default:
501 // Unknown or unsupported format character. Just copy verbatim to
502 // output.
503 if (Out(buf, sz, &count, '%'))
504 ++ptr;
505 if (!ch)
506 goto end_of_format_string;
507 if (Out(buf, sz, &count, ch))
508 ++ptr;
509 break;
510 }
511 } else {
512 copy_verbatim:
513 if (Out(buf, sz, &count, fmt[-1]))
514 ++ptr;
515 }
516 }
517 end_of_format_string:
518 end_of_output_buffer:
519 *ptr = '\000';
520 IncrementCount(&count);
521 return static_cast<ssize_t>(count)-1;
522 }
523
524 ssize_t FormatN(char* buf, size_t N, const char* fmt) {
525 // Make sure we can write at least one NUL byte.
526 ssize_t n = static_cast<ssize_t>(N);
527 if (n < 1)
528 return -1;
529 size_t count = 0;
530
531 // In the slow-path, we deal with errors by copying the contents of
532 // "fmt" unexpanded. This means, if there are no arguments passed, the
533 // Format() function always degenerates to version of strncpy() that
534 // de-duplicates '%' characters.
535 char* dst = buf;
536 const char* src = fmt;
537 for (; *src; ++src) {
538 char ch = *src;
539 if (!IncrementCount(&count) && n > 1) {
540 --dst;
541 break;
542 }
543 if (n > 1) {
544 --n;
545 *dst++ = ch;
546 }
547 if (ch == '%' && src[1] == '%')
548 ++src;
549 }
550 IncrementCount(&count);
551 *dst = '\000';
552 return static_cast<ssize_t>(count)-1;
553 }
554
555 } // namespace debug
556 } // namespace base
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698