OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 // | |
5 // Author: markus@chromium.org | |
6 | |
7 #include <limits> | |
8 | |
9 #include "base/debug/format.h" | |
10 | |
11 #if !defined(NDEBUG) | |
12 // In debug builds, we use RAW_CHECK() to print useful error messages, if | |
13 // Format() is called with broken arguments. | |
14 // As our contract promises that Format() can be called from any crazy | |
jln (very slow on Chromium)
2013/08/01 00:03:15
nit: remove crazy
| |
15 // run-time context, it is not actually safe to call logging functions from it; | |
16 // and we only ever do so for debug builds and hope for the best. | |
17 // We should _never_ call any logging function other than RAW_CHECK(), and | |
18 // we should _never_ include any logging code that is active in production | |
19 // builds. | |
20 // In other words; please do not remove the #ifdef around this #include. | |
21 // Instead, in production builds we opt for returning a degraded result, | |
22 // whenever an error is encountered. | |
23 // E.g. The broken function call | |
24 // Format("errno = %d (%x)", errno, strerror(errno)) | |
25 // will print something like | |
26 // errno = 13, (%x) | |
27 // instead of | |
28 // errno = 13 (Access denied) | |
29 // In most of the anticipated use cases, that's probably the preferred | |
30 // behavior. | |
31 #include "base/logging.h" | |
32 #define RAW_DCHECK RAW_CHECK | |
33 #else | |
34 #define RAW_DCHECK(x) do { if (x) { } } while (0) | |
jln (very slow on Chromium)
2013/08/01 00:03:15
Do you want to just add this to base/logging.h ?
| |
35 #endif | |
36 | |
37 | |
38 namespace base { | |
39 namespace debug { | |
40 | |
41 // The code in this file is extremely careful to be async-signal-safe. | |
42 // | |
43 // Most obviously, we avoid calling any code that could dynamically allocate | |
44 // memory. Doing so would almost certainly result in bugs and dead-locks. | |
45 // We also avoid calling any other STL functions that could have unintended | |
46 // side-effects involving memory allocation or access to other shared | |
47 // resources. | |
48 // | |
49 // But on top of that, we also avoid calling other library functions, as many | |
50 // of them have the side-effect of calling getenv() (in order to deal with | |
51 // localization) or accessing errno. The latter sounds benign, but there are | |
52 // several execution contexts where it isn't even possible to safely read let | |
53 // alone write errno. | |
54 // | |
55 // The stated design goal of the Format() function is that it can be called | |
56 // from any context that can safely call C or C++ code (i.e. anything that | |
57 // doesn't require assembly code). | |
58 // | |
59 // For a brief overview of some but not all of the issues with async-signal- | |
60 // safety, refer to: | |
61 // http://pubs.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_04.html | |
62 | |
63 namespace { | |
64 | |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
| |
65 inline bool IncrementCount(size_t* count, size_t inc = 1) { | |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please, avoid default parameters. Just have a Incr
| |
66 // "inc" is either 1 or a "padding" value. Padding is clamped at run-time to | |
67 // at most SSIZE_MAX. So, we know that "inc" is always in the range | |
68 // 1..SSIZE_MAX. | |
69 // This allows us to compute "SSIZE_MAX - inc" without incurring any | |
70 // integer overflows. | |
71 RAW_DCHECK((size_t)inc <= (size_t)std::numeric_limits<ssize_t>::max()); | |
72 if (*count > std::numeric_limits<ssize_t>::max() - inc) { | |
73 *count = std::numeric_limits<ssize_t>::max(); | |
74 return false; | |
75 } else { | |
76 *count += inc; | |
77 return true; | |
78 } | |
79 } | |
80 | |
81 inline bool Out(char* buf, size_t sz, size_t* count, char ch) { | |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
| |
82 if (*count + 1 < sz) { | |
jln (very slow on Chromium)
2013/08/01 00:03:15
if (sz >= 1 && count < sz - 1)
| |
83 buf[*count] = ch; | |
84 IncrementCount(count); | |
85 return true; | |
86 } | |
87 IncrementCount(count); | |
88 return false; | |
89 } | |
90 | |
91 inline void Pad(char* buf, size_t sz, size_t* count, char pad, size_t padding, | |
jln (very slow on Chromium)
2013/08/01 00:03:15
Please document this function and its |parameters|
| |
92 size_t len, char** ptr) { | |
93 char *dst = *ptr; | |
94 for (; padding > len; --padding) | |
95 if (Out(buf, sz, count, pad)) | |
96 ++dst; | |
97 else { | |
98 if (--padding) | |
99 IncrementCount(count, padding-len); | |
100 break; | |
101 } | |
102 *ptr = dst; | |
103 } | |
104 | |
105 // POSIX doesn't define any async-signal-safe function for converting | |
106 // an integer to ASCII. Define our own version. | |
107 // | |
108 // This also gives us the ability to make the function a little more powerful | |
109 // and have it deal with padding, with truncation, and with predicting the | |
110 // length of the untruncated output. | |
111 // | |
112 // IToASCII() converts an (optionally signed) integer to ASCII. It never | |
113 // writes more than "sz" bytes. Output will be truncated as needed, and a NUL | |
114 // character is appended, unless "sz" is zero. It returns the number of non-NUL | |
115 // bytes that would be output if no truncation had happened. | |
116 // | |
117 // It supports bases 2 through 16. Padding can be done with either '0' zeros | |
118 // or ' ' spaces. | |
119 size_t IToASCII(bool sign, bool upcase, int64_t i, char* buf, size_t sz, | |
120 int base, size_t padding, char pad) { | |
121 // Sanity check for the "base". | |
122 if (base < 2 || base > 16 || (sign && base != 10)) { | |
123 if (static_cast<ssize_t>(sz) >= 1) | |
124 buf[0] = '\000'; | |
125 return 0; | |
126 } | |
127 | |
128 // Handle negative numbers, if requested by caller. | |
129 size_t count = 0; | |
130 size_t n = 1; | |
131 char* start = buf; | |
132 int minint = 0; | |
133 bool needs_minus = false; | |
134 uint64_t num; | |
135 if (sign && i < 0) { | |
136 // If we aren't inserting padding, or if we are padding with '0' zeros, | |
137 // we should insert the minus character now. It makes it easier to | |
138 // correctly deal with truncated padded numbers. | |
139 // On the other hand, if we are padding with ' ' spaces, we have to | |
140 // delay outputting the minus character until later. | |
141 if (padding <= 2 || pad == '0') { | |
142 ++count; | |
143 | |
144 // Make sure we can write the '-' character. | |
145 if (++n > sz) { | |
146 if (sz > 0) | |
147 *start = '\000'; | |
148 } else | |
149 *start++ = '-'; | |
150 | |
151 // Adjust padding, since we just output one character already. | |
152 if (padding) | |
153 --padding; | |
154 } else | |
155 needs_minus = true; | |
156 | |
157 // Turn our number positive. | |
158 if (i == std::numeric_limits<int64_t>::min()) { | |
159 // The most negative integer needs special treatment. | |
160 minint = 1; | |
161 num = -(i + 1); | |
162 } else { | |
163 // "Normal" negative numbers are easy. | |
164 num = -i; | |
165 } | |
166 } else | |
167 num = i; | |
168 | |
169 // Loop until we have converted the entire number. Output at least one | |
170 // character (i.e. '0'). | |
171 char* ptr = start; | |
172 bool started = false; | |
173 do { | |
174 // Sanity check. If padding is used to fill the entire address space, | |
175 // don't allow more than SSIZE_MAX bytes. | |
176 if (++count == static_cast<size_t>(std::numeric_limits<ssize_t>::max())) { | |
177 RAW_DCHECK(count < | |
178 static_cast<size_t>(std::numeric_limits<ssize_t>::max())); | |
179 break; | |
180 } | |
181 | |
182 // Make sure there is still enough space left in our output buffer. | |
183 if (n == sz) { | |
184 if (ptr > start) { | |
185 // It is rare that we need to output a partial number. But if asked | |
186 // to do so, we will still make sure we output the correct number of | |
187 // leading digits. | |
188 // Since we are generating the digits in reverse order, we actually | |
189 // have to discard digits in the order that we have already emitted | |
190 // them. This is essentially equivalent to: | |
191 // memmove(start, start+1, --ptr - start) | |
192 --ptr; | |
193 for (char* move = start; move < ptr; ++move) | |
194 *move = move[1]; | |
195 } else | |
196 goto cannot_write_anything_but_nul; | |
jln (very slow on Chromium)
2013/08/01 00:03:15
Any way to split this to a subfunction ?
| |
197 } else | |
198 ++n; | |
199 | |
200 // Output the next digit and (if necessary) compensate for the lowest- | |
201 // most negative integer needing special treatment. This works because, | |
202 // no matter the bit width of the integer, the lowest-most decimal | |
203 // integer always ends in 2, 4, 6, or 8. | |
204 if (n <= sz) { | |
205 if (!num && started) | |
206 if (needs_minus) { | |
207 *ptr++ = '-'; | |
208 needs_minus = false; | |
209 } else | |
210 *ptr++ = pad; | |
211 else { | |
212 started = true; | |
213 *ptr++ = (upcase ? "0123456789ABCDEF" : "0123456789abcdef") | |
214 [num%base+minint]; | |
215 } | |
216 } | |
217 | |
218 cannot_write_anything_but_nul: | |
219 minint = 0; | |
220 num /= base; | |
221 | |
222 // Add padding, if requested. | |
223 if (padding > 0) { | |
224 --padding; | |
225 | |
226 // Performance optimization for when we are asked to output | |
227 // excessive padding, but our output buffer is limited in size. | |
228 // Even if we output a 128bit number in binary, we would never | |
229 // write more than 130 characters. So, anything beyond this limit | |
230 // and we can compute the result arithmetically. | |
231 if (count > n && count - n > 130) { | |
232 IncrementCount(&count, padding); | |
233 padding = 0; | |
234 } | |
235 } | |
236 } while (num || padding || needs_minus); | |
237 | |
238 // Terminate the output with a NUL character. | |
239 if (sz > 0) | |
240 *ptr = '\000'; | |
241 | |
242 // Conversion to ASCII actually resulted in the digits being in reverse | |
243 // order. We can't easily generate them in forward order, as we can't tell | |
244 // the number of characters needed until we are done converting. | |
245 // So, now, we reverse the string (except for the possible '-' sign). | |
246 while (--ptr > start) { | |
247 char ch = *ptr; | |
248 *ptr = *start; | |
249 *start++ = ch; | |
250 } | |
251 return count; | |
252 } | |
253 | |
254 } // anonymous namespace | |
255 | |
256 ssize_t internal::FormatN(char* buf, size_t sz, const char* fmt, | |
257 const Arg* args, const size_t max_args) { | |
258 // Make sure we can write at least one NUL byte. | |
259 if (static_cast<ssize_t>(sz) < 1) | |
260 return -1; | |
261 | |
262 // Iterate over format string and interpret '%' arguments as they are | |
263 // encountered. | |
264 char* ptr = buf; | |
265 size_t padding; | |
266 char pad; | |
267 size_t count = 0; | |
268 for (unsigned int cur_arg = 0; | |
269 *fmt && | |
270 count != static_cast<size_t>(std::numeric_limits<ssize_t>::max()); ) { | |
271 if (*fmt++ == '%') { | |
272 padding = 0; | |
273 pad = ' '; | |
274 char ch = *fmt++; | |
275 format_character_found: | |
276 switch (ch) { | |
277 case '0': case '1': case '2': case '3': case '4': | |
278 case '5': case '6': case '7': case '8': case '9': | |
279 // Found a width parameter. Convert to an integer value and store in | |
280 // "padding". If the leading digit is a zero, change the padding | |
281 // character from a space ' ' to a zero '0'. | |
282 pad = ch == '0' ? '0' : ' '; | |
283 for (;;) { | |
284 const size_t max_padding = std::numeric_limits<ssize_t>::max(); | |
285 if (padding > max_padding/10 || | |
286 10*padding > max_padding - (ch - '0')) { | |
287 RAW_DCHECK(padding <= max_padding/10 && | |
288 10*padding <= max_padding - (ch - '0')); | |
289 // Integer overflow detected. Skip the rest of the width until | |
290 // we find the format character, then do the normal error handling. | |
291 while ((ch = *fmt++) >= '0' && ch <= '9') { | |
292 } | |
293 goto fail_to_expand; | |
294 } | |
295 padding = 10*padding + ch - '0'; | |
296 ch = *fmt++; | |
297 if (ch < '0' || ch > '9') { | |
298 // Reached the end of the width parameter. This is where the format | |
299 // character is found. | |
300 goto format_character_found; | |
301 } | |
302 } | |
303 break; | |
304 case 'c': { // Output an ASCII character. | |
305 // Check that there are arguments left to be inserted. | |
306 if (cur_arg >= max_args) { | |
307 RAW_DCHECK(cur_arg < max_args); | |
308 goto fail_to_expand; | |
309 } | |
310 | |
311 // Check that the argument has the expected type. | |
312 const Arg& arg = args[cur_arg++]; | |
313 if (arg.type_ != Arg::INT && | |
314 arg.type_ != Arg::UINT) { | |
315 RAW_DCHECK(arg.type_ == Arg::INT || | |
316 arg.type_ == Arg::UINT); | |
317 goto fail_to_expand; | |
318 } | |
319 | |
320 // Apply padding, if needed. | |
321 Pad(buf, sz, &count, ' ', padding, 1, &ptr); | |
322 | |
323 // Convert the argument to an ASCII character and output it. | |
324 char ch = static_cast<char>(arg.i_); | |
325 if (!ch) | |
326 goto end_of_output_buffer; | |
327 if (Out(buf, sz, &count, ch)) | |
328 ++ptr; | |
329 break; } | |
330 case 'd': { // Output a signed or unsigned integer-like value. | |
331 // Check that there are arguments left to be inserted. | |
332 if (cur_arg >= max_args) { | |
333 RAW_DCHECK(cur_arg < max_args); | |
334 goto fail_to_expand; | |
335 } | |
336 | |
337 // Check that the argument has the expected type. | |
338 const Arg& arg = args[cur_arg++]; | |
339 if (arg.type_ != Arg::INT && | |
340 arg.type_ != Arg::UINT) { | |
341 RAW_DCHECK(arg.type_ == Arg::INT || | |
342 arg.type_ == Arg::UINT); | |
343 goto fail_to_expand; | |
344 } | |
345 | |
346 // Our implementation of IToASCII() can handle all widths of data types | |
347 // and can print both signed and unsigned values. | |
348 IncrementCount(&count, | |
349 IToASCII(arg.type_ == Arg::INT, false, arg.i_, | |
350 ptr, sz - (ptr - buf), 10, padding, pad)); | |
351 | |
352 // Advance "ptr" to the end of the string that was just emitted. | |
353 if (sz - (ptr - buf)) | |
354 while (*ptr) | |
355 ++ptr; | |
356 break; } | |
357 case 'x': // Output an unsigned hexadecimal value. | |
358 case 'X': | |
359 case 'p': { // Output a pointer value. | |
360 // Check that there are arguments left to be inserted. | |
361 if (cur_arg >= max_args) { | |
362 RAW_DCHECK(cur_arg < max_args); | |
363 goto fail_to_expand; | |
364 } | |
365 | |
366 const Arg& arg = args[cur_arg++]; | |
367 int64_t i; | |
368 switch (ch) { | |
369 case 'x': // Hexadecimal values are available for integer-like args. | |
370 case 'X': | |
371 // Check that the argument has the expected type. | |
372 if (arg.type_ != Arg::INT && | |
373 arg.type_ != Arg::UINT) { | |
374 RAW_DCHECK(arg.type_ == Arg::INT || | |
375 arg.type_ == Arg::UINT); | |
376 goto fail_to_expand; | |
377 } | |
378 i = arg.i_; | |
379 | |
380 // The Arg() constructor automatically performed sign expansion on | |
381 // signed parameters. This is great when outputting a %d decimal | |
382 // number, but can result in unexpected leading 0xFF bytes when | |
383 // outputting a %c hexadecimal number. Mask bits, if necessary. | |
384 // We have to do this here, instead of in the Arg() constructor, as | |
385 // the Arg() constructor cannot tell whether we will output a %d | |
386 // or a %x. Only the latter should experience masking. | |
387 if (arg.width_ < sizeof(int64_t)) | |
388 i &= (1LL << (8*arg.width_)) - 1; | |
389 break; | |
390 default: | |
391 // Pointer values require an actual pointer or a string. | |
392 if (arg.type_ == Arg::POINTER) | |
393 i = reinterpret_cast<uintptr_t>(arg.ptr_); | |
394 else if (arg.type_ == Arg::STRING) | |
395 i = reinterpret_cast<uintptr_t>(arg.s_); | |
396 else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) && | |
397 arg.i_ == 0) // Allow C++'s version of NULL | |
398 i = 0; | |
399 else { | |
400 RAW_DCHECK(arg.type_ == Arg::POINTER || | |
401 arg.type_ == Arg::STRING); | |
402 goto fail_to_expand; | |
403 } | |
404 | |
405 // Pointers always include the "0x" prefix. This affects padding. | |
406 if (padding) { | |
407 if (pad == ' ') { | |
408 // Predict the number of hex digits (including "0x" prefix) that | |
409 // will be output for this address when it is converted to ASCII. | |
410 size_t chars = 2; | |
411 uint64_t j = i; | |
412 do { | |
413 ++chars; | |
414 j >>= 4; | |
415 } while (j); | |
416 | |
417 // Output the necessary number of space characters to perform | |
418 // padding. We can't rely on IToASCII() to do that for us, as it | |
419 // would incorrectly add padding _after_ the "0x" prefix. | |
420 Pad(buf, sz, &count, pad, padding, chars, &ptr); | |
421 | |
422 // Inform IToASCII() that it no longer needs to handle the | |
423 // padding. | |
424 padding = 0; | |
425 } else { | |
426 // Adjust for the two-character "0x" prefix. | |
427 padding = padding >= 2 ? padding - 2 : 0; | |
428 } | |
429 } | |
430 | |
431 // Insert "0x" prefix, if there is still sufficient space in the | |
432 // output buffer. | |
433 if (Out(buf, sz, &count, '0')) | |
434 ++ptr; | |
435 if (Out(buf, sz, &count, 'x')) | |
436 ++ptr; | |
437 break; | |
438 } | |
439 | |
440 // No matter what data type this value originated from, print it as | |
441 // a regular hexadecimal number. | |
442 IncrementCount(&count, | |
443 IToASCII(false, ch != 'x', i, ptr, sz - (ptr - buf), | |
444 16, padding, pad)); | |
445 | |
446 // Advance "ptr" to the end of the string that was just emitted. | |
447 if (sz - (ptr - buf)) | |
448 while (*ptr) | |
449 ++ptr; | |
450 break; } | |
451 case 's': { | |
452 // Check that there are arguments left to be inserted. | |
453 if (cur_arg >= max_args) { | |
454 RAW_DCHECK(cur_arg < max_args); | |
455 goto fail_to_expand; | |
456 } | |
457 | |
458 // Check that the argument has the expected type. | |
459 const Arg& arg = args[cur_arg++]; | |
460 const char *s; | |
461 if (arg.type_ == Arg::STRING) | |
462 s = arg.s_ ? arg.s_ : "<NULL>"; | |
463 else if (arg.type_ == Arg::INT && arg.width_ == sizeof(void *) && | |
464 arg.i_ == 0) // Allow C++'s version of NULL | |
465 s = "<NULL>"; | |
466 else { | |
467 RAW_DCHECK(arg.type_ == Arg::STRING); | |
468 goto fail_to_expand; | |
469 } | |
470 | |
471 // Apply padding, if needed. This requires us to first check the | |
472 // length of the string that we are outputting. | |
473 if (padding) { | |
474 size_t len = 0; | |
475 for (const char* src = s; *src++; ) | |
476 ++len; | |
477 Pad(buf, sz, &count, ' ', padding, len, &ptr); | |
478 } | |
479 | |
480 // Printing a string involves nothing more than copying it into the | |
481 // output buffer and making sure we don't output more bytes than | |
482 // available space. | |
483 for (const char* src = s; *src; ) | |
484 if (Out(buf, sz, &count, *src++)) | |
485 ++ptr; | |
486 break; } | |
487 case '%': | |
488 // Quoted percent '%' character. | |
489 goto copy_verbatim; | |
490 fail_to_expand: | |
491 // C++ gives us tools to do type checking -- something that snprintf() | |
492 // could never really do. So, whenever we see arguments that don't | |
493 // match up with the format string, we refuse to output them. But | |
494 // since we have to be extremely conservative about being async- | |
495 // signal-safe, we are limited in the type of error handling that we | |
496 // can do in production builds (in debug builds we can use RAW_DCHECK() | |
497 // and hope for the best). So, all we do is pass the format string | |
498 // unchanged. That should eventually get the user's attention; and in | |
499 // the meantime, it hopefully doesn't lose too much data. | |
500 default: | |
501 // Unknown or unsupported format character. Just copy verbatim to | |
502 // output. | |
503 if (Out(buf, sz, &count, '%')) | |
504 ++ptr; | |
505 if (!ch) | |
506 goto end_of_format_string; | |
507 if (Out(buf, sz, &count, ch)) | |
508 ++ptr; | |
509 break; | |
510 } | |
511 } else { | |
512 copy_verbatim: | |
513 if (Out(buf, sz, &count, fmt[-1])) | |
514 ++ptr; | |
515 } | |
516 } | |
517 end_of_format_string: | |
518 end_of_output_buffer: | |
519 *ptr = '\000'; | |
520 IncrementCount(&count); | |
521 return static_cast<ssize_t>(count)-1; | |
522 } | |
523 | |
524 ssize_t FormatN(char* buf, size_t N, const char* fmt) { | |
525 // Make sure we can write at least one NUL byte. | |
526 ssize_t n = static_cast<ssize_t>(N); | |
527 if (n < 1) | |
528 return -1; | |
529 size_t count = 0; | |
530 | |
531 // In the slow-path, we deal with errors by copying the contents of | |
532 // "fmt" unexpanded. This means, if there are no arguments passed, the | |
533 // Format() function always degenerates to version of strncpy() that | |
534 // de-duplicates '%' characters. | |
535 char* dst = buf; | |
536 const char* src = fmt; | |
537 for (; *src; ++src) { | |
538 char ch = *src; | |
539 if (!IncrementCount(&count) && n > 1) { | |
540 --dst; | |
541 break; | |
542 } | |
543 if (n > 1) { | |
544 --n; | |
545 *dst++ = ch; | |
546 } | |
547 if (ch == '%' && src[1] == '%') | |
548 ++src; | |
549 } | |
550 IncrementCount(&count); | |
551 *dst = '\000'; | |
552 return static_cast<ssize_t>(count)-1; | |
553 } | |
554 | |
555 } // namespace debug | |
556 } // namespace base | |
OLD | NEW |