Index: telemetry/telemetry/internal/snap_page_util.py |
diff --git a/telemetry/telemetry/internal/snap_page_util.py b/telemetry/telemetry/internal/snap_page_util.py |
index 57210030776187a0ebe0b2ee0350d5fc817607c3..721e35b431d93f37f3192747633fc5a62c5f6e4a 100644 |
--- a/telemetry/telemetry/internal/snap_page_util.py |
+++ b/telemetry/telemetry/internal/snap_page_util.py |
@@ -3,11 +3,35 @@ |
# found in the LICENSE file. |
import os |
+import json |
+import sys |
from telemetry.core import util |
from telemetry.internal.browser import browser_finder |
+def _TransmitLargeJSONToTab(tab, json_obj, js_holder_name): |
+ tab.ExecuteJavaScript( |
+ 'var {{ @js_holder_name }} = "";', js_holder_name=js_holder_name) |
+ |
+ # To avoid crashing devtool connection (details in crbug.com/763119#c16), |
+ # we break down the json string to chunks which each chunk has a maximum |
+ # size of 100000 characters (100000 seems to not break the connection and |
+ # makes sending data reasonably fast). |
+ k = 0 |
+ step_size = 100000 |
+ json_obj_string = json.dumps(json_obj) |
+ while k < len(json_obj_string): |
+ sub_string_chunk = json_obj_string[k: k + step_size] |
+ k += step_size |
+ tab.ExecuteJavaScript( |
+ '{{ @js_holder_name }} += {{ sub_string_chunk }};', |
+ js_holder_name=js_holder_name, sub_string_chunk=sub_string_chunk) |
+ |
+ tab.ExecuteJavaScript( |
+ '{{ @js_holder_name }} = JSON.parse({{ @js_holder_name }});', |
+ js_holder_name=js_holder_name) |
+ |
def SnapPage(finder_options, url, interactive, snapshot_file): |
""" Save the HTML snapshot of the page whose address is |url| to |
|snapshot_file|. |
@@ -17,28 +41,61 @@ def SnapPage(finder_options, url, interactive, snapshot_file): |
try: |
tab = browser.tabs[0] |
tab.Navigate(url) |
- tab.WaitForDocumentReadyStateToBeComplete() |
if interactive: |
raw_input( |
'Activating interactive mode. Press enter after you finish ' |
"interacting with the page to snapshot the page's DOM content.") |
- with open( |
- os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
- 'HTMLSerializer.js')) as f: |
+ |
+ sys.stdout.write( |
+ 'Snapshotting content of %s. This could take a while...\n' % url) |
+ tab.WaitForDocumentReadyStateToBeComplete() |
+ tab.action_runner.WaitForNetworkQuiescence() |
+ |
+ with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
+ 'HTMLSerializer.js')) as f: |
snapit_script = f.read() |
- tab.ExecuteJavaScript(snapit_script) |
- tab.ExecuteJavaScript( |
- ''' |
- var serializedDomArray; |
- var htmlSerializer = new HTMLSerializer(); |
- htmlSerializer.processDocument(document); |
- htmlSerializer.fillHolesAsync(document, function(s) { |
- serializedDomArray = s.html; |
- }); |
- ''') |
- print 'Snapshotting content of %s. This could take a while...' % url |
- tab.WaitForJavaScriptCondition('serializedDomArray !== undefined') |
- serialized_dom = ''.join(tab.EvaluateJavaScript('serializedDomArray')) |
- snapshot_file.write(serialized_dom) |
+ |
+ with open(os.path.join(util.GetTelemetryThirdPartyDir(), 'snap-it', |
+ 'popup.js')) as f: |
+ dom_combining_script = f.read() |
+ |
+ serialized_doms = [] |
+ |
+ # Serialize the dom in each frame. |
+ for context_id in tab.EnableAllContexts(): |
+ tab.ExecuteJavaScript(snapit_script, context_id=context_id) |
+ tab.ExecuteJavaScript( |
+ ''' |
+ var serializedDom; |
+ var htmlSerializer = new HTMLSerializer(); |
+ htmlSerializer.processDocument(document); |
+ htmlSerializer.fillHolesAsync(document, function(s) { |
+ serializedDom = s.asDict(); |
+ }); |
+ ''', context_id=context_id) |
+ tab.WaitForJavaScriptCondition( |
+ 'serializedDom !== undefined', context_id=context_id) |
+ serialized_doms.append(tab.EvaluateJavaScript( |
+ 'serializedDom', context_id=context_id)) |
+ |
+ # Execute doms combining code in blank page to minimize the chance of V8 |
+ # OOM. |
+ tab.Navigate('about:blank') |
+ tab.WaitForDocumentReadyStateToBeComplete() |
+ |
+ # Sending all the serialized doms back to tab execution context. |
+ tab.ExecuteJavaScript('var serializedDoms = [];') |
+ for i in xrange(len(serialized_doms)): |
+ sys.stdout.write('Processing dom of frame #%i / %i\r' % |
+ (i, len(serialized_doms))) |
+ sys.stdout.flush() |
+ _TransmitLargeJSONToTab(tab, serialized_doms[i], 'sub_dom') |
+ tab.ExecuteJavaScript('serializedDoms.push(sub_dom);') |
+ |
+ # Combine all the doms to one HTML string. |
+ tab.EvaluateJavaScript(dom_combining_script) |
+ page_snapshot = tab.EvaluateJavaScript('outputHTMLString(serializedDoms);') |
+ |
+ snapshot_file.write(page_snapshot) |
finally: |
browser.Close() |