tools/gen-postmortem-metadata.py - Issue 8803024: Optionally export metadata with libv8 to enable debuggers to inspect V8 state.

Unified Diff: tools/gen-postmortem-metadata.py

Issue 8803024: Optionally export metadata with libv8 to enable debuggers to inspect V8 state. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 8 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/gen-postmortem-metadata.py

===================================================================

--- tools/gen-postmortem-metadata.py (revision 0)

+++ tools/gen-postmortem-metadata.py (revision 0)

@@ -0,0 +1,478 @@

+#!/usr/bin/env python

+# Redistribution and use in source and binary forms, with or without

+# modification, are permitted provided that the following conditions are

+# met:

+# * Redistributions of source code must retain the above copyright

+# notice, this list of conditions and the following disclaimer.

+# * Redistributions in binary form must reproduce the above

+# copyright notice, this list of conditions and the following

+# disclaimer in the documentation and/or other materials provided

+# with the distribution.

+# * Neither the name of Google Inc. nor the names of its

+# contributors may be used to endorse or promote products derived

+# from this software without specific prior written permission.

+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+# Emits a C++ file to be compiled and linked into libv8 to support postmortem

+# debugging tools. Most importantly, this tool emits constants describing V8

+# internals:

+# v8dbg_type_CLASS__TYPE = VALUE Describes class type values

+# v8dbg_class_CLASS__FIELD__TYPE = OFFSET Describes class fields

+# v8dbg_parent_CLASS__PARENT Describes class hierarchy

+# v8dbg_frametype_NAME = VALUE Describes stack frame values

+# v8dbg_off_fp_NAME = OFFSET Frame pointer offsets

+# v8dbg_prop_NAME = OFFSET Object property offsets

+# v8dbg_NAME = VALUE Miscellaneous values

+# These constants are declared as global integers so that they'll be present in

+# the generated libv8 binary.

+import re

+import sys

+# Miscellaneous constants, tags, and masks used for object identification.

+consts_misc = [

+ { 'name': 'FirstNonstringType', 'value': 'FIRST_NONSTRING_TYPE' },

+ { 'name': 'IsNotStringMask', 'value': 'kIsNotStringMask' },

+ { 'name': 'StringTag', 'value': 'kStringTag' },

+ { 'name': 'NotStringTag', 'value': 'kNotStringTag' },

+ { 'name': 'StringEncodingMask', 'value': 'kStringEncodingMask' },

+ { 'name': 'TwoByteStringTag', 'value': 'kTwoByteStringTag' },

+ { 'name': 'AsciiStringTag', 'value': 'kAsciiStringTag' },

+ { 'name': 'StringRepresentationMask',

+ 'value': 'kStringRepresentationMask' },

+ { 'name': 'SeqStringTag', 'value': 'kSeqStringTag' },

+ { 'name': 'ConsStringTag', 'value': 'kConsStringTag' },

+ { 'name': 'ExternalStringTag', 'value': 'kExternalStringTag' },

+ { 'name': 'FailureTag', 'value': 'kFailureTag' },

+ { 'name': 'FailureTagMask', 'value': 'kFailureTagMask' },

+ { 'name': 'HeapObjectTag', 'value': 'kHeapObjectTag' },

+ { 'name': 'HeapObjectTagMask', 'value': 'kHeapObjectTagMask' },

+ { 'name': 'SmiTag', 'value': 'kSmiTag' },

+ { 'name': 'SmiTagMask', 'value': 'kSmiTagMask' },

+ { 'name': 'SmiValueShift', 'value': 'kSmiTagSize' },

+ { 'name': 'PointerSizeLog2', 'value': 'kPointerSizeLog2' },

+ { 'name': 'prop_idx_content',

+ 'value': 'DescriptorArray::kContentArrayIndex' },

+ { 'name': 'prop_idx_first',

+ 'value': 'DescriptorArray::kFirstIndex' },

+ { 'name': 'prop_type_field',

+ 'value': 'FIELD' },

+ { 'name': 'prop_type_first_phantom',

+ 'value': 'MAP_TRANSITION' },

+ { 'name': 'prop_type_mask',

+ 'value': 'PropertyDetails::TypeField::kMask' },

+ { 'name': 'off_fp_context',

+ 'value': 'StandardFrameConstants::kContextOffset' },

+ { 'name': 'off_fp_marker',

+ 'value': 'StandardFrameConstants::kMarkerOffset' },

+ { 'name': 'off_fp_function',

+ 'value': 'JavaScriptFrameConstants::kFunctionOffset' },

+ { 'name': 'off_fp_args',

+ 'value': 'JavaScriptFrameConstants::kLastParameterOffset' },

+];

+# The following useful fields are missing accessors, so we define fake ones.

+extras_accessors = [

+ 'HeapObject, map, Map, kMapOffset',

+ 'JSObject, elements, Object, kElementsOffset',

+ 'FixedArray, data, uintptr_t, kHeaderSize',

+ 'Map, instance_attributes, int, kInstanceAttributesOffset',

+ 'Map, instance_descriptors, int, kInstanceDescriptorsOrBitField3Offset',

+ 'Map, inobject_properties, int, kInObjectPropertiesOffset',

+ 'Map, instance_size, int, kInstanceSizeOffset',

+ 'HeapNumber, value, double, kValueOffset',

+ 'ConsString, first, String, kFirstOffset',

+ 'ConsString, second, String, kSecondOffset',

+ 'ExternalString, resource, Object, kResourceOffset',

+ 'SeqAsciiString, chars, char, kHeaderSize',

+];

+# The following is a whitelist of classes we expect to find when scanning the

+# source code. This list is not exhaustive, but it's still useful to identify

+# when this script gets out of sync with the source. See load_objects().

+expected_classes = [

+ 'ConsString', 'FixedArray', 'HeapNumber', 'JSArray', 'JSFunction',

+ 'JSObject', 'JSRegExp', 'JSValue', 'Map', 'Oddball', 'Script',

+ 'SeqAsciiString', 'SharedFunctionInfo'

+];

+# The following structures store high-level representations of the structures

+# for which we're going to emit descriptive constants.

+types = {}; # set of all type names

+typeclasses = {}; # maps type names to corresponding class names

+klasses = {}; # known classes, including parents

+fields = []; # field declarations

+header = '''

+/*

+ * This file is generated by %s. Do not edit directly.

+ */

+#include "v8.h"

+#include "frames.h"

+#include "frames-inl.h" /* for architecture-specific frame constants */

+using namespace v8::internal;

+extern "C" {

+/* stack frame constants */

+#define FRAME_CONST(value, klass) \

+ int v8dbg_frametype_##klass = StackFrame::value;

+STACK_FRAME_TYPE_LIST(FRAME_CONST)

+#undef FRAME_CONST

+''' % sys.argv[0];

+footer = '''

+'''

+# Loads class hierarchy and type information from "objects.h".

+def load_objects():

+ objfilename = sys.argv[2];

+ objfile = open(objfilename, 'r');

+ in_insttype = False;

+ typestr = '';

+ #

+ # Construct a dictionary for the classes we're sure should be present.

+ #

+ checktypes = {};

+ for klass in expected_classes:

+ checktypes[klass] = True;

+ #

+ # Iterate objects.h line-by-line to collect type and class information.

+ # For types, we accumulate a string representing the entire InstanceType

+ # enum definition and parse it later because it's easier to do so

+ # without the embedded newlines.

+ #

+ for line in objfile:

+ if (line.startswith('enum InstanceType {')):

+ in_insttype = True;

+ continue;

+ if (in_insttype and line.startswith('};')):

+ in_insttype = False;

+ continue;

+ line = re.sub('//.*', '', line.rstrip().lstrip());

+ if (in_insttype):

+ typestr += line;

+ continue;

+ match = re.match('class (\w[^\s:]*)(: public (\w[^\s{]*))?\s*{',

+ line);

+ if (match):

+ klass = match.group(1);

+ pklass = match.group(3);

+ klasses[klass] = { 'parent': pklass };

+ #

+ # Process the instance type declaration.

+ #

+ entries = typestr.split(',');

+ for entry in entries:

+ types[re.sub('\s*=.*', '', entry).lstrip()] = True;

+ #

+ # Infer class names for each type based on a systematic transformation.

+ # For example, "JS_FUNCTION_TYPE" becomes "JSFunction". We find the

+ # class for each type rather than the other way around because there are

+ # fewer cases where one type maps to more than one class than the other

+ # way around.

+ #

+ for type in types:

+ #

+ # Symbols and Strings are implemented using the same classes.

+ #

+ usetype = re.sub('SYMBOL_', 'STRING_', type);

+ #

+ # REGEXP behaves like REG_EXP, as in JS_REGEXP_TYPE => JSRegExp.

+ #

+ usetype = re.sub('_REGEXP_', '_REG_EXP_', usetype);

+ #

+ # Remove the "_TYPE" suffix and then convert to camel case,

+ # except that a "JS" prefix remains uppercase (as in

+ # "JS_FUNCTION_TYPE" => "JSFunction").

+ #

+ if (not usetype.endswith('_TYPE')):

+ continue;

+ usetype = usetype[0:len(usetype) - len('_TYPE')];

+ parts = usetype.split('_');

+ cctype = '';

+ if (parts[0] == 'JS'):

+ cctype = 'JS';

+ start = 1;

+ else:

+ cctype = '';

+ start = 0;

+ for ii in range(start, len(parts)):

+ part = parts[ii];

+ cctype += part[0].upper() + part[1:].lower();

+ #

+ # Mapping string types is more complicated. Both types and

+ # class names for Strings specify a representation (e.g., Seq,

+ # Cons, External, or Sliced) and an encoding (TwoByte or Ascii),

+ # In the simplest case, both of these are explicit in both

+ # names, as in:

+ #

+ # EXTERNAL_ASCII_STRING_TYPE => ExternalAsciiString

+ #

+ # However, either the representation or encoding can be omitted

+ # from the type name, in which case "Seq" and "TwoByte" are

+ # assumed, as in:

+ #

+ # STRING_TYPE => SeqTwoByteString

+ #

+ # Additionally, sometimes the type name has more information

+ # than the class, as in:

+ #

+ # CONS_ASCII_STRING_TYPE => ConsString

+ #

+ # To figure this out dynamically, we first check for a

+ # representation and encoding and add them if they're not

+ # present. If that doesn't yield a valid class name, then we

+ # strip out the representation.

+ #

+ if (cctype.endswith('String')):

+ if (cctype.find('Cons') == -1 and

+ cctype.find('External') == -1 and

+ cctype.find('Sliced') == -1):

+ if (cctype.find('Ascii') != -1):

+ cctype = re.sub('AsciiString$',

+ 'SeqAsciiString', cctype);

+ else:

+ cctype = re.sub('String$',

+ 'SeqString', cctype);

+ if (cctype.find('Ascii') == -1):

+ cctype = re.sub('String$', 'TwoByteString',

+ cctype);

+ if (not (cctype in klasses)):

+ cctype = re.sub('Ascii', '', cctype);

+ cctype = re.sub('TwoByte', '', cctype);

+ #

+ # Despite all that, some types have no corresponding class.

+ #

+ if (cctype in klasses):

+ typeclasses[type] = cctype;

+ if (cctype in checktypes):

+ del checktypes[cctype];

+ if (len(checktypes) > 0):

+ for klass in checktypes:

+ print('error: expected class \"%s\" not found' % klass);

+ sys.exit(1);

+# For a given macro call, pick apart the arguments and return an object

+# describing the corresponding output constant. See load_fields().

+def parse_field(call):

+ # Replace newlines with spaces.

+ for ii in range(0, len(call)):

+ if (call[ii] == '\n'):

+ call[ii] == ' ';

+ idx = call.find('(');

+ kind = call[0:idx];

+ rest = call[idx + 1: len(call) - 1];

+ args = re.split('\s*,\s*', rest);

+ consts = [];

+ if (kind == 'ACCESSORS' or kind == 'ACCESSORS_GCSAFE'):

+ klass = args[0];

+ field = args[1];

+ dtype = args[2];

+ offset = args[3];

+ return ({

+ 'name': 'class_%s__%s__%s' % (klass, field, dtype),

+ 'value': '%s::%s' % (klass, offset)

+ });

+ assert(kind == 'SMI_ACCESSORS');

+ klass = args[0];

+ field = args[1];

+ offset = args[2];

+ return ({

+ 'name': 'class_%s__%s__%s' % (klass, field, 'SMI'),

+ 'value': '%s::%s' % (klass, offset)

+ });

+# Load field offset information from objects-inl.h.

+def load_fields():

+ inlfilename = sys.argv[3];

+ inlfile = open(inlfilename, 'r');

+ #

+ # Each class's fields and the corresponding offsets are described in the

+ # source by calls to macros like "ACCESSORS" (and friends). All we do

+ # here is extract these macro invocations, taking into account that they

+ # may span multiple lines and may contain nested parentheses. We also

+ # call parse_field() to pick apart the invocation.

+ #

+ prefixes = [ 'ACCESSORS', 'ACCESSORS_GCSAFE', 'SMI_ACCESSORS' ];

+ current = '';

+ opens = 0;

+ for line in inlfile:

+ if (opens > 0):

+ # Continuation line

+ for ii in range(0, len(line)):

+ if (line[ii] == '('):

+ opens += 1;

+ elif (line[ii] == ')'):

+ opens -= 1;

+ if (opens == 0):

+ break;

+ current += line[0:ii + 1];

+ continue;

+ for prefix in prefixes:

+ if (not line.startswith(prefix + '(')):

+ continue;

+ if (len(current) > 0):

+ fields.append(parse_field(current));

+ current = '';

+ for ii in range(len(prefix), len(line)):

+ if (line[ii] == '('):

+ opens += 1;

+ elif (line[ii] == ')'):

+ opens -= 1;

+ if (opens == 0):

+ break;

+ current += line[0:ii + 1];

+ if (len(current) > 0):

+ fields.append(parse_field(current));

+ current = '';

+ for body in extras_accessors:

+ fields.append(parse_field('ACCESSORS(%s)' % body));

+# Emit a block of constants.

+def emit_set(out, consts):

+ for ii in range(0, len(consts)):

+ out.write('int v8dbg_%s = %s;\n' %

+ (consts[ii]['name'], consts[ii]['value']));

+ out.write('\n');

+# Emit the whole output file.

+def emit_config():

+ out = file(sys.argv[1], 'w');

+ out.write(header);

+ out.write('/* miscellaneous constants */\n');

+ emit_set(out, consts_misc);

+ out.write('/* class type information */\n');

+ consts = [];

+ keys = typeclasses.keys();

+ keys.sort();

+ for typename in keys:

+ klass = typeclasses[typename];

+ consts.append({

+ 'name': 'type_%s__%s' % (klass, typename),

+ 'value': typename

+ });

+ emit_set(out, consts);

+ out.write('/* class hierarchy information */\n');

+ consts = [];

+ keys = klasses.keys();

+ keys.sort();

+ for klassname in keys:

+ pklass = klasses[klassname]['parent'];

+ if (pklass == None):

+ continue;

+ consts.append({

+ 'name': 'parent_%s__%s' % (klassname, pklass),

+ 'value': 0

+ });

+ emit_set(out, consts);

+ out.write('/* field information */\n');

+ emit_set(out, fields);

+ out.write(footer);

+if (len(sys.argv) < 4):

+ print('usage: %s output.cc objects.h objects-inl.h' % sys.argv[0]);

+ sys.exit(2);

+load_objects();

+load_fields();

+emit_config();

« no previous file with comments | « build/common.gypi ('k') | tools/gyp/v8.gyp » ('j') | no next file with comments »