OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/env python |
| 2 |
| 3 # |
| 4 # Copyright 2012 the V8 project authors. All rights reserved. |
| 5 # Redistribution and use in source and binary forms, with or without |
| 6 # modification, are permitted provided that the following conditions are |
| 7 # met: |
| 8 # |
| 9 # * Redistributions of source code must retain the above copyright |
| 10 # notice, this list of conditions and the following disclaimer. |
| 11 # * Redistributions in binary form must reproduce the above |
| 12 # copyright notice, this list of conditions and the following |
| 13 # disclaimer in the documentation and/or other materials provided |
| 14 # with the distribution. |
| 15 # * Neither the name of Google Inc. nor the names of its |
| 16 # contributors may be used to endorse or promote products derived |
| 17 # from this software without specific prior written permission. |
| 18 # |
| 19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 # |
| 31 |
| 32 # |
| 33 # Emits a C++ file to be compiled and linked into libv8 to support postmortem |
| 34 # debugging tools. Most importantly, this tool emits constants describing V8 |
| 35 # internals: |
| 36 # |
| 37 # v8dbg_type_CLASS__TYPE = VALUE Describes class type values |
| 38 # v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields |
| 39 # v8dbg_parent_CLASS__PARENT Describes class hierarchy |
| 40 # v8dbg_frametype_NAME = VALUE Describes stack frame values |
| 41 # v8dbg_off_fp_NAME = OFFSET Frame pointer offsets |
| 42 # v8dbg_prop_NAME = OFFSET Object property offsets |
| 43 # v8dbg_NAME = VALUE Miscellaneous values |
| 44 # |
| 45 # These constants are declared as global integers so that they'll be present in |
| 46 # the generated libv8 binary. |
| 47 # |
| 48 |
| 49 import re |
| 50 import sys |
| 51 |
| 52 # |
| 53 # Miscellaneous constants, tags, and masks used for object identification. |
| 54 # |
| 55 consts_misc = [ |
| 56 { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' }, |
| 57 |
| 58 { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' }, |
| 59 { 'name': 'StringTag', 'value': 'kStringTag' }, |
| 60 { 'name': 'NotStringTag', 'value': 'kNotStringTag' }, |
| 61 |
| 62 { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' }, |
| 63 { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' }, |
| 64 { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' }, |
| 65 |
| 66 { 'name': 'StringRepresentationMask', |
| 67 'value': 'kStringRepresentationMask' }, |
| 68 { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' }, |
| 69 { 'name': 'ConsStringTag', 'value': 'kConsStringTag' }, |
| 70 { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' }, |
| 71 |
| 72 { 'name': 'FailureTag', 'value': 'kFailureTag' }, |
| 73 { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' }, |
| 74 { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' }, |
| 75 { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' }, |
| 76 { 'name': 'SmiTag', 'value': 'kSmiTag' }, |
| 77 { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' }, |
| 78 { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' }, |
| 79 { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' }, |
| 80 |
| 81 { 'name': 'prop_idx_content', |
| 82 'value': 'DescriptorArray::kContentArrayIndex' }, |
| 83 { 'name': 'prop_idx_first', |
| 84 'value': 'DescriptorArray::kFirstIndex' }, |
| 85 { 'name': 'prop_type_field', |
| 86 'value': 'FIELD' }, |
| 87 { 'name': 'prop_type_first_phantom', |
| 88 'value': 'MAP_TRANSITION' }, |
| 89 { 'name': 'prop_type_mask', |
| 90 'value': 'PropertyDetails::TypeField::kMask' }, |
| 91 |
| 92 { 'name': 'off_fp_context', |
| 93 'value': 'StandardFrameConstants::kContextOffset' }, |
| 94 { 'name': 'off_fp_marker', |
| 95 'value': 'StandardFrameConstants::kMarkerOffset' }, |
| 96 { 'name': 'off_fp_function', |
| 97 'value': 'JavaScriptFrameConstants::kFunctionOffset' }, |
| 98 { 'name': 'off_fp_args', |
| 99 'value': 'JavaScriptFrameConstants::kLastParameterOffset' }, |
| 100 ]; |
| 101 |
| 102 # |
| 103 # The following useful fields are missing accessors, so we define fake ones. |
| 104 # |
| 105 extras_accessors = [ |
| 106 'HeapObject, map, Map, kMapOffset', |
| 107 'JSObject, elements, Object, kElementsOffset', |
| 108 'FixedArray, data, uintptr_t, kHeaderSize', |
| 109 'Map, instance_attributes, int, kInstanceAttributesOffset', |
| 110 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset', |
| 111 'Map, inobject_properties, int, kInObjectPropertiesOffset', |
| 112 'Map, instance_size, int, kInstanceSizeOffset', |
| 113 'HeapNumber, value, double, kValueOffset', |
| 114 'ConsString, first, String, kFirstOffset', |
| 115 'ConsString, second, String, kSecondOffset', |
| 116 'ExternalString, resource, Object, kResourceOffset', |
| 117 'SeqAsciiString, chars, char, kHeaderSize', |
| 118 ]; |
| 119 |
| 120 # |
| 121 # The following is a whitelist of classes we expect to find when scanning the |
| 122 # source code. This list is not exhaustive, but it's still useful to identify |
| 123 # when this script gets out of sync with the source. See load_objects(). |
| 124 # |
| 125 expected_classes = [ |
| 126 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction', |
| 127 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script', |
| 128 'SeqAsciiString', 'SharedFunctionInfo' |
| 129 ]; |
| 130 |
| 131 |
| 132 # |
| 133 # The following structures store high-level representations of the structures |
| 134 # for which we're going to emit descriptive constants. |
| 135 # |
| 136 types = {}; # set of all type names |
| 137 typeclasses = {}; # maps type names to corresponding class names |
| 138 klasses = {}; # known classes, including parents |
| 139 fields = []; # field declarations |
| 140 |
| 141 header = ''' |
| 142 /* |
| 143 * This file is generated by %s. Do not edit directly. |
| 144 */ |
| 145 |
| 146 #include "v8.h" |
| 147 #include "frames.h" |
| 148 #include "frames-inl.h" /* for architecture-specific frame constants */ |
| 149 |
| 150 using namespace v8::internal; |
| 151 |
| 152 extern "C" { |
| 153 |
| 154 /* stack frame constants */ |
| 155 #define FRAME_CONST(value, klass) \ |
| 156 int v8dbg_frametype_##klass = StackFrame::value; |
| 157 |
| 158 STACK_FRAME_TYPE_LIST(FRAME_CONST) |
| 159 |
| 160 #undef FRAME_CONST |
| 161 |
| 162 ''' % sys.argv[0]; |
| 163 |
| 164 footer = ''' |
| 165 } |
| 166 ''' |
| 167 |
| 168 # |
| 169 # Loads class hierarchy and type information from "objects.h". |
| 170 # |
| 171 def load_objects(): |
| 172 objfilename = sys.argv[2]; |
| 173 objfile = open(objfilename, 'r'); |
| 174 in_insttype = False; |
| 175 |
| 176 typestr = ''; |
| 177 |
| 178 # |
| 179 # Construct a dictionary for the classes we're sure should be present. |
| 180 # |
| 181 checktypes = {}; |
| 182 for klass in expected_classes: |
| 183 checktypes[klass] = True; |
| 184 |
| 185 # |
| 186 # Iterate objects.h line-by-line to collect type and class information. |
| 187 # For types, we accumulate a string representing the entire InstanceType |
| 188 # enum definition and parse it later because it's easier to do so |
| 189 # without the embedded newlines. |
| 190 # |
| 191 for line in objfile: |
| 192 if (line.startswith('enum InstanceType {')): |
| 193 in_insttype = True; |
| 194 continue; |
| 195 |
| 196 if (in_insttype and line.startswith('};')): |
| 197 in_insttype = False; |
| 198 continue; |
| 199 |
| 200 line = re.sub('//.*', '', line.rstrip().lstrip()); |
| 201 |
| 202 if (in_insttype): |
| 203 typestr += line; |
| 204 continue; |
| 205 |
| 206 match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{', |
| 207 line); |
| 208 |
| 209 if (match): |
| 210 klass = match.group(1); |
| 211 pklass = match.group(3); |
| 212 klasses[klass] = { 'parent': pklass }; |
| 213 |
| 214 # |
| 215 # Process the instance type declaration. |
| 216 # |
| 217 entries = typestr.split(','); |
| 218 for entry in entries: |
| 219 types[re.sub('\s*=.*', '', entry).lstrip()] = True; |
| 220 |
| 221 # |
| 222 # Infer class names for each type based on a systematic transformation. |
| 223 # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the |
| 224 # class for each type rather than the other way around because there are |
| 225 # fewer cases where one type maps to more than one class than the other |
| 226 # way around. |
| 227 # |
| 228 for type in types: |
| 229 # |
| 230 # Symbols and Strings are implemented using the same classes. |
| 231 # |
| 232 usetype = re.sub('SYMBOL_', 'STRING_', type); |
| 233 |
| 234 # |
| 235 # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp. |
| 236 # |
| 237 usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype); |
| 238 |
| 239 # |
| 240 # Remove the "_TYPE" suffix and then convert to camel case, |
| 241 # except that a "JS" prefix remains uppercase (as in |
| 242 # "JS_FUNCTION_TYPE" => "JSFunction"). |
| 243 # |
| 244 if (not usetype.endswith('_TYPE')): |
| 245 continue; |
| 246 |
| 247 usetype = usetype[0:len(usetype) - len('_TYPE')]; |
| 248 parts = usetype.split('_'); |
| 249 cctype = ''; |
| 250 |
| 251 if (parts[0] == 'JS'): |
| 252 cctype = 'JS'; |
| 253 start = 1; |
| 254 else: |
| 255 cctype = ''; |
| 256 start = 0; |
| 257 |
| 258 for ii in range(start, len(parts)): |
| 259 part = parts[ii]; |
| 260 cctype += part[0].upper() + part[1:].lower(); |
| 261 |
| 262 # |
| 263 # Mapping string types is more complicated. Both types and |
| 264 # class names for Strings specify a representation (e.g., Seq, |
| 265 # Cons, External, or Sliced) and an encoding (TwoByte or Ascii), |
| 266 # In the simplest case, both of these are explicit in both |
| 267 # names, as in: |
| 268 # |
| 269 # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString |
| 270 # |
| 271 # However, either the representation or encoding can be omitted |
| 272 # from the type name, in which case "Seq" and "TwoByte" are |
| 273 # assumed, as in: |
| 274 # |
| 275 # STRING_TYPE => SeqTwoByteString |
| 276 # |
| 277 # Additionally, sometimes the type name has more information |
| 278 # than the class, as in: |
| 279 # |
| 280 # CONS_ASCII_STRING_TYPE => ConsString |
| 281 # |
| 282 # To figure this out dynamically, we first check for a |
| 283 # representation and encoding and add them if they're not |
| 284 # present. If that doesn't yield a valid class name, then we |
| 285 # strip out the representation. |
| 286 # |
| 287 if (cctype.endswith('String')): |
| 288 if (cctype.find('Cons') == -1 and |
| 289 cctype.find('External') == -1 and |
| 290 cctype.find('Sliced') == -1): |
| 291 if (cctype.find('Ascii') != -1): |
| 292 cctype = re.sub('AsciiString$', |
| 293 'SeqAsciiString', cctype); |
| 294 else: |
| 295 cctype = re.sub('String$', |
| 296 'SeqString', cctype); |
| 297 |
| 298 if (cctype.find('Ascii') == -1): |
| 299 cctype = re.sub('String$', 'TwoByteString', |
| 300 cctype); |
| 301 |
| 302 if (not (cctype in klasses)): |
| 303 cctype = re.sub('Ascii', '', cctype); |
| 304 cctype = re.sub('TwoByte', '', cctype); |
| 305 |
| 306 # |
| 307 # Despite all that, some types have no corresponding class. |
| 308 # |
| 309 if (cctype in klasses): |
| 310 typeclasses[type] = cctype; |
| 311 if (cctype in checktypes): |
| 312 del checktypes[cctype]; |
| 313 |
| 314 if (len(checktypes) > 0): |
| 315 for klass in checktypes: |
| 316 print('error: expected class \"%s\" not found' % klass); |
| 317 |
| 318 sys.exit(1); |
| 319 |
| 320 |
| 321 # |
| 322 # For a given macro call, pick apart the arguments and return an object |
| 323 # describing the corresponding output constant. See load_fields(). |
| 324 # |
| 325 def parse_field(call): |
| 326 # Replace newlines with spaces. |
| 327 for ii in range(0, len(call)): |
| 328 if (call[ii] == '\n'): |
| 329 call[ii] == ' '; |
| 330 |
| 331 idx = call.find('('); |
| 332 kind = call[0:idx]; |
| 333 rest = call[idx + 1: len(call) - 1]; |
| 334 args = re.split('\s*,\s*', rest); |
| 335 |
| 336 consts = []; |
| 337 |
| 338 if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'): |
| 339 klass = args[0]; |
| 340 field = args[1]; |
| 341 dtype = args[2]; |
| 342 offset = args[3]; |
| 343 |
| 344 return ({ |
| 345 'name': 'class_%s__%s__%s' % (klass, field, dtype), |
| 346 'value': '%s::%s' % (klass, offset) |
| 347 }); |
| 348 |
| 349 assert(kind == 'SMI_ACCESSORS'); |
| 350 klass = args[0]; |
| 351 field = args[1]; |
| 352 offset = args[2]; |
| 353 |
| 354 return ({ |
| 355 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'), |
| 356 'value': '%s::%s' % (klass, offset) |
| 357 }); |
| 358 |
| 359 # |
| 360 # Load field offset information from objects-inl.h. |
| 361 # |
| 362 def load_fields(): |
| 363 inlfilename = sys.argv[3]; |
| 364 inlfile = open(inlfilename, 'r'); |
| 365 |
| 366 # |
| 367 # Each class's fields and the corresponding offsets are described in the |
| 368 # source by calls to macros like "ACCESSORS" (and friends). All we do |
| 369 # here is extract these macro invocations, taking into account that they |
| 370 # may span multiple lines and may contain nested parentheses. We also |
| 371 # call parse_field() to pick apart the invocation. |
| 372 # |
| 373 prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ]; |
| 374 current = ''; |
| 375 opens = 0; |
| 376 |
| 377 for line in inlfile: |
| 378 if (opens > 0): |
| 379 # Continuation line |
| 380 for ii in range(0, len(line)): |
| 381 if (line[ii] == '('): |
| 382 opens += 1; |
| 383 elif (line[ii] == ')'): |
| 384 opens -= 1; |
| 385 |
| 386 if (opens == 0): |
| 387 break; |
| 388 |
| 389 current += line[0:ii + 1]; |
| 390 continue; |
| 391 |
| 392 for prefix in prefixes: |
| 393 if (not line.startswith(prefix + '(')): |
| 394 continue; |
| 395 |
| 396 if (len(current) > 0): |
| 397 fields.append(parse_field(current)); |
| 398 current = ''; |
| 399 |
| 400 for ii in range(len(prefix), len(line)): |
| 401 if (line[ii] == '('): |
| 402 opens += 1; |
| 403 elif (line[ii] == ')'): |
| 404 opens -= 1; |
| 405 |
| 406 if (opens == 0): |
| 407 break; |
| 408 |
| 409 current += line[0:ii + 1]; |
| 410 |
| 411 if (len(current) > 0): |
| 412 fields.append(parse_field(current)); |
| 413 current = ''; |
| 414 |
| 415 for body in extras_accessors: |
| 416 fields.append(parse_field('ACCESSORS(%s)' % body)); |
| 417 |
| 418 # |
| 419 # Emit a block of constants. |
| 420 # |
| 421 def emit_set(out, consts): |
| 422 for ii in range(0, len(consts)): |
| 423 out.write('int v8dbg_%s = %s;\n' % |
| 424 (consts[ii]['name'], consts[ii]['value'])); |
| 425 out.write('\n'); |
| 426 |
| 427 # |
| 428 # Emit the whole output file. |
| 429 # |
| 430 def emit_config(): |
| 431 out = file(sys.argv[1], 'w'); |
| 432 |
| 433 out.write(header); |
| 434 |
| 435 out.write('/* miscellaneous constants */\n'); |
| 436 emit_set(out, consts_misc); |
| 437 |
| 438 out.write('/* class type information */\n'); |
| 439 consts = []; |
| 440 keys = typeclasses.keys(); |
| 441 keys.sort(); |
| 442 for typename in keys: |
| 443 klass = typeclasses[typename]; |
| 444 consts.append({ |
| 445 'name': 'type_%s__%s' % (klass, typename), |
| 446 'value': typename |
| 447 }); |
| 448 |
| 449 emit_set(out, consts); |
| 450 |
| 451 out.write('/* class hierarchy information */\n'); |
| 452 consts = []; |
| 453 keys = klasses.keys(); |
| 454 keys.sort(); |
| 455 for klassname in keys: |
| 456 pklass = klasses[klassname]['parent']; |
| 457 if (pklass == None): |
| 458 continue; |
| 459 |
| 460 consts.append({ |
| 461 'name': 'parent_%s__%s' % (klassname, pklass), |
| 462 'value': 0 |
| 463 }); |
| 464 |
| 465 emit_set(out, consts); |
| 466 |
| 467 out.write('/* field information */\n'); |
| 468 emit_set(out, fields); |
| 469 |
| 470 out.write(footer); |
| 471 |
| 472 if (len(sys.argv) < 4): |
| 473 print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]); |
| 474 sys.exit(2); |
| 475 |
| 476 load_objects(); |
| 477 load_fields(); |
| 478 emit_config(); |
OLD | NEW |