Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(113)

Unified Diff: grit/gather/chrome_html.py

Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set… (Closed) Base URL: http://git.chromium.org/external/grit-i18n.git@master
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « grit/format/data_pack.py ('k') | grit/node/structure.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: grit/gather/chrome_html.py
diff --git a/grit/gather/chrome_html.py b/grit/gather/chrome_html.py
new file mode 100755
index 0000000000000000000000000000000000000000..4fcabc6d5499691f5864ae35a2e18458ede1d208
--- /dev/null
+++ b/grit/gather/chrome_html.py
@@ -0,0 +1,433 @@
+#!/usr/bin/env python
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Prepares a Chrome HTML file by inlining resources and adding references to high DPI resources.
tony 2012/05/17 17:57:25 Why can't we reuse html_inline.py? Can we just ad
+
+This is a small script that takes a HTML file, looks for src attributes
+and inlines the specified file, producing one HTML file with no external
+dependencies. It recursively inlines the included files. When inlining CSS
+image files this script also checks for the existence of high DPI versions
+of the inlined file including those on relevant platforms.
+"""
+
+import os
+import re
+import sys
+import types
+import base64
+import mimetypes
+
+from grit.gather import interface
+from grit import lazy_re
+from grit import util
+
+scale_factors = ['2x']
+
+DIST_DEFAULT = 'chromium'
+DIST_ENV_VAR = 'CHROMIUM_BUILD'
+DIST_SUBSTR = '%DISTRIBUTION%'
+
+# Matches beginning of an "if" block with trailing spaces.
+_BEGIN_IF_BLOCK = lazy_re.compile(
+ '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')
+
+# Matches ending of an "if" block with preceding spaces.
+_END_IF_BLOCK = lazy_re.compile('\s*</if>')
+
+# Matches a chrome theme source URL.
+_THEME_SOURCE = lazy_re.compile('chrome://theme/IDR_[A-Z0-9_]*')
+
+def ReadFile(input_filename):
+ """Helper function that returns input_filename as a string.
+
+ Args:
+ input_filename: name of file to be read
+
+ Returns:
+ string
+ """
+ f = open(input_filename, 'rb')
+ file_contents = f.read()
+ f.close()
+ return file_contents
+
+def FileDataUrl(path):
+ mimetype = mimetypes.guess_type(path)[0] or 'text/plain'
+ inline_data = base64.standard_b64encode(ReadFile(path))
+ return "data:%s;base64,%s" % (mimetype, inline_data)
+
+def SrcInlineAsDataURL(
+ src_match, base_path, distribution, inlined_files, names_only=False):
+ """regex replace function.
+
+ Takes a regex match for src="filename", attempts to read the file
+ at 'filename' and returns the src attribute with the file inlined
+ as a data URI. If it finds DIST_SUBSTR string in file name, replaces
+ it with distribution.
+
+ Args:
+ src_match: regex match object with 'filename' named capturing group
+ base_path: path that to look for files in
+ distribution: string that should replace DIST_SUBSTR
+ inlined_files: The name of the opened file is appended to this list.
+ names_only: If true, the function will not read the file but just return "".
+ It will still add the filename to |inlined_files|.
+
+ Returns:
+ string
+ """
+ filename = src_match.group('filename')
+
+ if filename.find(':') != -1:
+ # filename is probably a URL, which we don't want to bother inlining
+ return src_match.group(0)
+
+ filename = filename.replace('%DISTRIBUTION%', distribution)
+ filepath = os.path.join(base_path, filename)
+ inlined_files.add(filepath)
+
+ if names_only:
+ return ""
+
+ prefix = src_match.string[src_match.start():src_match.start('filename')-1]
+ return "%s\"%s\"" % (prefix, FileDataUrl(filepath))
+
+def InsertImageSet(
+ src_match, base_path, distribution, inlined_files, names_only=False):
+ filename = src_match.group('filename')
+ attr = src_match.group('attribute')
+ prefix = src_match.string[src_match.start():src_match.start('filename')-1]
+
+ # Any matches for which a chrome URL handler will serve all scale factors
+ # can simply request all scale factors.
+ if _THEME_SOURCE.match(filename):
+ images = ["url(\"%s\") %s" % (filename, '1x')]
+ for sc in scale_factors:
+ images.append("url(\"%s@%s\") %s" % (filename, sc, sc))
+ return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))
+
+ if filename.find(':') != -1:
+ # filename is probably a URL, which we don't want to bother inlining
+ return src_match.group(0)
+
+ filename = filename.replace('%DISTRIBUTION%', distribution)
+ filepath = os.path.join(base_path, filename)
+ inlined_files.add(filepath)
+ images = ["url(\"%s\") %s" % (FileDataUrl(filepath), '1x')]
+
+ for sc in scale_factors:
+ # check for existence of file and add to image set.
+ scale_image = filename.replace('%DISTRIBUTION%', distribution)
+ scale_path = os.path.split(os.path.join(base_path, scale_image))
+ scale_image_path = "%s/%s/%s" % (scale_path[0], sc, scale_path[1])
+ if os.path.isfile(scale_image_path):
+ inlined_files.add(scale_image_path)
+ images.append("url(\"%s\") %s" % (FileDataUrl(scale_image_path), sc))
+ return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))
+
+class InlinedData:
+ """Helper class holding the results from DoInline().
+
+ Holds the inlined data and the set of filenames of all the inlined
+ files.
+ """
+ def __init__(self, inlined_data, inlined_files):
+ self.inlined_data = inlined_data
+ self.inlined_files = inlined_files
+
+def DoInline(
+ input_filename, grd_node, allow_external_script=False, names_only=False):
+ """Helper function that inlines the resources in a specified file.
+
+ Reads input_filename, finds all the src attributes and attempts to
+ inline the files they are referring to, then returns the result and
+ the set of inlined files.
+
+ Args:
+ input_filename: name of file to read in
+ grd_node: html node from the grd file for this include tag
+ names_only: |nil| will be returned for the inlined contents (faster).
+ Returns:
+ a tuple of the inlined data as a string and the set of filenames
+ of all the inlined files
+ """
+ input_filepath = os.path.dirname(input_filename)
+
+ distribution = DIST_DEFAULT
+ if DIST_ENV_VAR in os.environ.keys():
+ distribution = os.environ[DIST_ENV_VAR]
+ if len(distribution) > 1 and distribution[0] == '_':
+ distribution = distribution[1:].lower()
+
+ # Keep track of all the files we inline.
+ inlined_files = set()
+
+ def SrcReplace(src_match, filepath=input_filepath,
+ inlined_files=inlined_files):
+ """Helper function to provide SrcInlineAsDataURL with the base file path"""
+ return SrcInlineAsDataURL(
+ src_match, filepath, distribution, inlined_files, names_only=names_only)
+
+ def SrcImageSet(src_match, filepath=input_filepath,
+ inlined_files=inlined_files):
+ """Helper function to provide InsertImageSet with the base file path"""
+ return InsertImageSet(
+ src_match, filepath, distribution, inlined_files, names_only=names_only)
+
+ def GetFilepath(src_match):
+ filename = src_match.group('filename')
+
+ if filename.find(':') != -1:
+ # filename is probably a URL, which we don't want to bother inlining
+ return None
+
+ filename = filename.replace('%DISTRIBUTION%', distribution)
+ return os.path.join(input_filepath, filename)
+
+ def IsConditionSatisfied(src_match):
+ expression = src_match.group('expression')
+ return grd_node is None or grd_node.EvaluateCondition(expression)
+
+ def CheckConditionalElements(str):
+ """Helper function to conditionally inline inner elements"""
+ while True:
+ begin_if = _BEGIN_IF_BLOCK.search(str)
+ if begin_if is None:
+ return str
+
+ condition_satisfied = IsConditionSatisfied(begin_if)
+ leading = str[0:begin_if.start()]
+ content_start = begin_if.end()
+
+ # Find matching "if" block end.
+ count = 1
+ pos = begin_if.end()
+ while True:
+ end_if = _END_IF_BLOCK.search(str, pos)
+ if end_if is None:
+ raise Exception('Unmatched <if>')
+
+ next_if = _BEGIN_IF_BLOCK.search(str, pos)
+ if next_if is None or next_if.start() >= end_if.end():
+ count = count - 1
+ if count == 0:
+ break
+ pos = end_if.end()
+ else:
+ count = count + 1
+ pos = next_if.end()
+
+ content = str[content_start:end_if.start()]
+ trailing = str[end_if.end():]
+
+ if condition_satisfied:
+ str = leading + CheckConditionalElements(content) + trailing
+ else:
+ str = leading + trailing
+
+ def InlineFileContents(src_match, pattern, inlined_files=inlined_files):
+ """Helper function to inline external files of various types"""
+ filepath = GetFilepath(src_match)
+ if filepath is None:
+ return src_match.group(0)
+ inlined_files.add(filepath)
+
+ # Even if names_only is set, html files needs to be opened, because it
+ # can link to images that need to be added to the file set.
+ if names_only and not filepath.endswith('.html'):
+ return ""
+
+ return pattern % InlineToString(filepath, grd_node, allow_external_script)
+
+ def InlineIncludeFiles(src_match):
+ """Helper function to directly inline generic external files (without
+ wrapping them with any kind of tags).
+ """
+ return InlineFileContents(src_match, '%s')
+
+ def InlineScript(match):
+ """Helper function to inline external script files"""
+ attrs = (match.group('attrs1') + match.group('attrs2')).strip()
+ if attrs:
+ attrs = ' ' + attrs
+ return InlineFileContents(match, '<script' + attrs + '>%s</script>')
+
+ def InlineCSSText(text, css_filepath):
+ """Helper function that inlines external resources in CSS text"""
+ filepath = os.path.dirname(css_filepath)
+ return InlineCSSImages(text, filepath)
+
+ def InlineCSSFile(src_match, inlined_files=inlined_files):
+ """Helper function to inline external css files.
+
+ Args:
+ src_match: A regular expression match with a named group named "filename".
+
+ Returns:
+ The text that should replace the reference to the CSS file.
+ """
+ filepath = GetFilepath(src_match)
+ if filepath is None:
+ return src_match.group(0)
+
+ # Even if names_only is set, the CSS file needs to be opened, because it
+ # can link to images that need to be added to the file set.
+ inlined_files.add(filepath)
+ # When resolving CSS files we need to pass in the path so that relative URLs
+ # can be resolved.
+ return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath)
+
+ def InlineCSSImages(text, filepath=input_filepath):
+ """Helper function that inlines external images in CSS backgrounds."""
+ # Replace contents of url() for css attributes: content, background,
+ # or *-image.
+ return re.sub('(?P<attribute>content|background|[\w-]*-image):[ ]*' +
+ 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")',
+ lambda m: SrcImageSet(m, filepath),
+ text)
+
+ flat_text = ReadFile(input_filename)
+
+ if not allow_external_script:
+ # We need to inline css and js before we inline images so that image
+ # references gets inlined in the css and js
+ flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
+ '(?P<attrs2>.*?)></script>',
+ InlineScript,
+ flat_text)
+
+ flat_text = re.sub(
+ '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>',
+ InlineCSSFile,
+ flat_text)
+
+ flat_text = re.sub(
+ '<include\s+src="(?P<filename>[^"\']*)".*>',
+ InlineIncludeFiles,
+ flat_text)
+
+ # Check conditional elements, remove unsatisfied ones from the file.
+ flat_text = CheckConditionalElements(flat_text)
+
+ flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"',
+ SrcReplace,
+ flat_text)
+
+ # TODO(arv): Only do this inside <style> tags.
+ flat_text = InlineCSSImages(flat_text)
+ flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"',
+ SrcReplace,
+ flat_text)
+
+ if names_only:
+ flat_text = None # Will contains garbage if the flag is set anyway.
+ return InlinedData(flat_text, inlined_files)
+
+
+def InlineToString(input_filename, grd_node, allow_external_script=False):
+ """Inlines the resources in a specified file and returns it as a string.
+
+ Args:
+ input_filename: name of file to read in
+ grd_node: html node from the grd file for this include tag
+ Returns:
+ the inlined data as a string
+ """
+ try:
+ return DoInline(input_filename,
+ grd_node,
+ allow_external_script=allow_external_script).inlined_data
+ except IOError, e:
+ raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
+ (e.filename, input_filename, e.strerror))
+
+
+def InlineToFile(input_filename, output_filename, grd_node):
+ """Inlines the resources in a specified file and writes it.
+
+ Reads input_filename, finds all the src attributes and attempts to
+ inline the files they are referring to, then writes the result
+ to output_filename.
+
+ Args:
+ input_filename: name of file to read in
+ output_filename: name of file to be written to
+ grd_node: html node from the grd file for this include tag
+ Returns:
+ a set of filenames of all the inlined files
+ """
+ inlined_data = InlineToString(input_filename, grd_node)
+ out_file = open(output_filename, 'wb')
+ out_file.writelines(inlined_data)
+ out_file.close()
+
+
+def GetResourceFilenames(filename, allow_external_script=False):
+ """For a grd file, returns a set of all the files that would be inline."""
+ try:
+ return DoInline(filename, None, names_only=True,
+ allow_external_script=allow_external_script).inlined_files
+ except IOError, e:
+ raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
+ (e.filename, filename, e.strerror))
+
+
+class ChromeHtml(interface.GathererBase):
+ '''Represents an HTML document.'''
+
+ def __init__(self, html):
+ '''Creates a new object that represents 'text'.
+ Args:
+ html: 'filename.html'
+ '''
+ super(type(self), self).__init__()
+ self.filename_ = html
+ self.inlined_text_ = None
+ self.scale_factors_ = []
+
+ def SetAttributes(self, attrs):
+ '''Sets node attributes used by the gatherer.
+
+ This checks the scale_factors attribute.
+
+ Args:
+ attrs: The mapping of node attributes.
+ '''
+ if 'scale_factors' in attrs:
+ self.scale_factors_ = attrs['scale_factors'].split(' ')
+
+ def GetText(self):
+ '''Returns the original text of the HTML document'''
+ return self.inlined_text_
+
+ def GetData(self, lang, encoding):
+ '''Return inlined text of the HTML document'''
+ return self.inlined_text_
+
+ def Translate(self, lang, pseudo_if_not_available=True,
+ skeleton_gatherer=None, fallback_to_english=False):
+ '''Returns this document translated.'''
+ return self.inlined_text_
+
+ def Parse(self):
+ self.inlined_text_ = InlineToString(self.filename_, None)
+
+ @staticmethod
+ def FromFile(html, extkey=None, encoding = 'utf-8'):
+ '''Creates a ChromeHtml object for the contents of 'html'. Returns a new
+ ChromeHtml object.
+
+ Args:
+ html: file('') | 'filename.html'
+ extkey: ignored
+ encoding: 'utf-8' (encoding is ignored)
+
+ Return:
+ ChromeHtml(text_of_file)
+ '''
+ if not isinstance(html, types.StringTypes):
+ html = html.name
+
+ return ChromeHtml(html)
« no previous file with comments | « grit/format/data_pack.py ('k') | grit/node/structure.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698