tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser.py - Issue 24076016: Add JS heap snapshotting functionality to Telemetry (part 2)

Unified Diff: tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser.py

Issue 24076016: Add JS heap snapshotting functionality to Telemetry (part 2) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Code review (nduca) Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « tools/telemetry/telemetry/core/heap/__init__.py ('k') | tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser_unittest.py » ('j') | tools/telemetry/telemetry/core/jsheap/model.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser.py

diff --git a/tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser.py b/tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser.py

new file mode 100644

index 0000000000000000000000000000000000000000..7f6fdf1c9f96b6669bc6e830d486c6e414200103

--- /dev/null

+++ b/tools/telemetry/telemetry/core/heap/chrome_js_heap_snapshot_parser.py

@@ -0,0 +1,247 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import json

+from telemetry.core.heap import live_heap_object

+from telemetry.core.heap import retaining_edge

+class ChromeJsHeapSnapshotParser(object):

+ """ Parser for the heap snapshot.

+ The heap snapshot JSON format is defined by HeapSnapshotJSONSerializer in V8.

+ The snapshot contains a list of integers describing nodes (types, names, etc.)

+ and a list of integers describing edges (types, the node the edge points to,

+ etc.) and a string table. All strings are expressed as indices to the string

+ table.

+ In addition, the snapshot contains meta information describing the data fields

+ for nodes and the data fields for edges.

+ Attributes:

+ _node_dict: {int -> LiveHeapObject}, maps integer ids to LiveHeapObject

+ objects.

+ _node_list: [int], the raw node data of the heap snapshot.

+ _edge_list: [int], the raw edge data of the heap snapshot.

+ _node_types: [str], the possible node types in the heap snapshot.

+ _edge_types: [str], the possible edge types in the heap snapshot.

+ _node_fields: [str], the fields present in the heap snapshot for each node.

+ _edge_fields: [str], the fields present in the heap snapshot for each node.

+ _node_type_ix: int, index of the node type field.

+ _node_name_ix: int, index of the node name field.

+ _node_id_ix: int, index of the node id field.

+ _node_edge_count_ix: int, index of the node edge count field.

+ _node_field_count: int, number of node fields.

+ _edge_type_ix: int, index of the edge type field.

+ _edge_name_or_ix_ix: int, index of the "edge name or index" field.

+ _edge_to_node_ix: int, index of the "to node for an edge" field.

+ _edge_field_count: int, number of edge fields.

+ """

+ def __init__(self, raw_data):

+ heap = json.loads(raw_data)

+ self._node_dict = {}

+ # Read the snapshot components (nodes, edges, strings, metadata).

+ self._node_list = heap['nodes']

+ self._edge_list = heap['edges']

+ self._strings = heap['strings']

+ self._node_types = heap['snapshot']['meta']['node_types'][0]

+ self._edge_types = heap['snapshot']['meta']['edge_types'][0]

+ node_fields = heap['snapshot']['meta']['node_fields']

+ edge_fields = heap['snapshot']['meta']['edge_fields']

+ # Find the indices of the required node and edge fields based on the

+ # metadata.

+ self._node_type_ix = node_fields.index('type')

+ self._node_name_ix = node_fields.index('name')

+ self._node_id_ix = node_fields.index('id')

+ self._node_edge_count_ix = node_fields.index('edge_count')

+ self._node_field_count = len(node_fields)

+ self._edge_type_ix = edge_fields.index('type')

+ self._edge_name_or_ix_ix = edge_fields.index('name_or_index')

+ self._edge_to_node_ix = edge_fields.index('to_node')

+ self._edge_field_count = len(edge_fields)

+ self._ParseSnapshot()

+ @staticmethod

+ def CanImport(raw_data):

+ heap = json.loads(raw_data)

+ if ('nodes' not in heap or 'edges' not in heap or 'strings' not in heap or

+ 'snapshot' not in heap or 'meta' not in heap['snapshot']):

+ return False

+ meta = heap['snapshot']['meta']

+ if ('node_types' not in meta or 'edge_types' not in meta or

+ 'node_fields' not in meta or 'edge_fields' not in meta):

+ return False

+ node_fields = meta['node_fields']

+ edge_fields = meta['edge_fields']

+ if ('type' not in node_fields or 'name' not in node_fields or

+ 'id' not in node_fields or 'edge_count' not in node_fields):

+ return False

+ if ('type' not in edge_fields or 'name_or_index' not in edge_fields or

+ 'to_node' not in edge_fields):

+ return False

+ return True

+ def GetAllLiveHeapObjects(self):

+ return self._node_dict.values()

+ @staticmethod

+ def LiveHeapObjectToJavaScript(heap_object):

+ return heap_object.name or str(heap_object)

+ @staticmethod

+ def RetainingEdgeToJavaScript(edge):

+ if edge.type_string == 'property':

+ return '.' + edge.name_string

+ if edge.type_string == 'element':

+ return '[' + edge.name_string + ']'

+ return str(edge)

+ def _ParseSnapshot(self):

+ """Parses the stored JSON snapshot data.

+ Fills in self._node_dict with LiveHeapObject objects constructed based on

+ the heap snapshot. The LiveHeapObject objects contain the associated

+ RetainingEdge objects.

+ """

+ edge_start_ix = 0

+ for ix in xrange(0, len(self._node_list), self._node_field_count):

+ edge_start_ix = self._ReadNodeFromIndex(ix, edge_start_ix)

+ # Add pointers to the endpoints to the edges, and associate the edges with

+ # the "to" nodes.

+ for node_id in self._node_dict:

+ n = self._node_dict[node_id]

+ for e in n.edges_from:

+ self._node_dict[e.to_object_id].AddEdgeTo(e)

+ e.SetFromObject(n)

+ e.SetToObject(self._node_dict[e.to_object_id])

+ def _ReadNodeFromIndex(self, ix, edges_start):

+ """Reads the data for a node from the heap snapshot.

+ If the index contains an interesting node, constructs a Node object and adds

+ it to self._node_dict.

+ Args:

+ ix: int, index into the self._node_list array.

+ edges_start: int, the index of the edge array where the edges for the node

+ start.

+ Returns:

+ int, the edge start index for the next node.

+ Raises:

+ Exception: The node list of the snapshot is malformed.

+ """

+ if ix + self._node_field_count > len(self._node_list):

+ raise Exception('Snapshot node list too short')

+ type_ix = self._node_list[ix + self._node_type_ix]

+ type_string = self._node_types[int(type_ix)]

+ # edges_end is noninclusive (the index of the first edge that is not part of

+ # this node).

+ edge_count = self._node_list[ix + self._node_edge_count_ix]

+ edges_end = edges_start + edge_count * self._edge_field_count

+ if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(type_string):

+ return edges_end

+ name_ix = self._node_list[ix + self._node_name_ix]

+ node_id = self._node_list[ix + self._node_id_ix]

+ def ConstructorName(type_string, node_name_ix):

+ if type_string == 'object':

+ return self._strings[int(node_name_ix)]

+ return '(%s)' % type_string

+ ctor_name = ConstructorName(type_string, name_ix)

+ n = live_heap_object.LiveHeapObject(node_id, type_string, ctor_name)

+ if type_string == 'string':

+ n.string = self._strings[int(name_ix)]

+ for edge_ix in xrange(edges_start, edges_end, self._edge_field_count):

+ edge = self._ReadEdgeFromIndex(node_id, edge_ix)

+ if edge:

+ # The edge will be associated with the other endpoint when all the data

+ # has been read.

+ n.AddEdgeFrom(edge)

+ self._node_dict[node_id] = n

+ return edges_end

+ @staticmethod

+ def _IsNodeTypeUninteresting(type_string):

+ """Helper function for filtering out nodes from the heap snapshot.

+ Args:

+ type_string: str, type of the node.

+ Returns:

+ bool, True if the node is of an uninteresting type and shouldn't be

+ included in the heap snapshot analysis.

+ """

+ uninteresting_types = ('hidden', 'code', 'number', 'native', 'synthetic')

+ return type_string in uninteresting_types

+ @staticmethod

+ def _IsEdgeTypeUninteresting(edge_type_string):

+ """Helper function for filtering out edges from the heap snapshot.

+ Args:

+ edge_type_string: str, type of the edge.

+ Returns:

+ bool, True if the edge is of an uninteresting type and shouldn't be

+ included in the heap snapshot analysis.

+ """

+ uninteresting_types = ('weak', 'hidden', 'internal')

+ return edge_type_string in uninteresting_types

+ def _ReadEdgeFromIndex(self, node_id, edge_ix):

+ """Reads the data for an edge from the heap snapshot.

+ Args:

+ node_id: int, id of the node which is the starting point of the edge.

+ edge_ix: int, index into the self._edge_list array.

+ Returns:

+ Edge, if the index contains an interesting edge, otherwise None.

+ Raises:

+ Exception: The node list of the snapshot is malformed.

+ """

+ if edge_ix + self._edge_field_count > len(self._edge_list):

+ raise Exception('Snapshot edge list too short')

+ edge_type_ix = self._edge_list[edge_ix + self._edge_type_ix]

+ edge_type_string = self._edge_types[int(edge_type_ix)]

+ if ChromeJsHeapSnapshotParser._IsEdgeTypeUninteresting(edge_type_string):

+ return None

+ child_name_or_ix = self._edge_list[edge_ix + self._edge_name_or_ix_ix]

+ child_node_ix = self._edge_list[edge_ix + self._edge_to_node_ix]

+ # The child_node_ix is an index into the node list. Read the actual

+ # node information.

+ child_node_type_ix = self._node_list[child_node_ix + self._node_type_ix]

+ child_node_type_string = self._node_types[int(child_node_type_ix)]

+ child_node_id = self._node_list[child_node_ix + self._node_id_ix]

+ if ChromeJsHeapSnapshotParser._IsNodeTypeUninteresting(

+ child_node_type_string):

+ return None

+ child_name_string = ''

+ # For element nodes, the child has no name (only an index).

+ if (edge_type_string == 'element' or

+ int(child_name_or_ix) >= len(self._strings)):

+ child_name_string = str(child_name_or_ix)

+ else:

+ child_name_string = self._strings[int(child_name_or_ix)]

+ return retaining_edge.RetainingEdge(node_id, child_node_id,

+ edge_type_string, child_name_string)