grit/gather/chrome_html.py - Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set…

Unified Diff: grit/gather/chrome_html.py

Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set… (Closed) Base URL: http://git.chromium.org/external/grit-i18n.git@master

Patch Set: Created 8 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: grit/gather/chrome_html.py

diff --git a/grit/gather/chrome_html.py b/grit/gather/chrome_html.py

new file mode 100755

index 0000000000000000000000000000000000000000..4fcabc6d5499691f5864ae35a2e18458ede1d208

--- /dev/null

+++ b/grit/gather/chrome_html.py

@@ -0,0 +1,433 @@

+#!/usr/bin/env python

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+"""Prepares a Chrome HTML file by inlining resources and adding references to high DPI resources.

tony 2012/05/17 17:57:25 Why can't we reuse html_inline.py? Can we just ad

+This is a small script that takes a HTML file, looks for src attributes

+and inlines the specified file, producing one HTML file with no external

+dependencies. It recursively inlines the included files. When inlining CSS

+image files this script also checks for the existence of high DPI versions

+of the inlined file including those on relevant platforms.

+"""

+import os

+import re

+import sys

+import types

+import base64

+import mimetypes

+from grit.gather import interface

+from grit import lazy_re

+from grit import util

+scale_factors = ['2x']

+DIST_DEFAULT = 'chromium'

+DIST_ENV_VAR = 'CHROMIUM_BUILD'

+DIST_SUBSTR = '%DISTRIBUTION%'

+# Matches beginning of an "if" block with trailing spaces.

+_BEGIN_IF_BLOCK = lazy_re.compile(

+ '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')

+# Matches ending of an "if" block with preceding spaces.

+_END_IF_BLOCK = lazy_re.compile('\s*</if>')

+# Matches a chrome theme source URL.

+_THEME_SOURCE = lazy_re.compile('chrome://theme/IDR_[A-Z0-9_]*')

+def ReadFile(input_filename):

+ """Helper function that returns input_filename as a string.

+ Args:

+ input_filename: name of file to be read

+ Returns:

+ string

+ """

+ f = open(input_filename, 'rb')

+ file_contents = f.read()

+ f.close()

+ return file_contents

+def FileDataUrl(path):

+ mimetype = mimetypes.guess_type(path)[0] or 'text/plain'

+ inline_data = base64.standard_b64encode(ReadFile(path))

+ return "data:%s;base64,%s" % (mimetype, inline_data)

+def SrcInlineAsDataURL(

+ src_match, base_path, distribution, inlined_files, names_only=False):

+ """regex replace function.

+ Takes a regex match for src="filename", attempts to read the file

+ at 'filename' and returns the src attribute with the file inlined

+ as a data URI. If it finds DIST_SUBSTR string in file name, replaces

+ it with distribution.

+ Args:

+ src_match: regex match object with 'filename' named capturing group

+ base_path: path that to look for files in

+ distribution: string that should replace DIST_SUBSTR

+ inlined_files: The name of the opened file is appended to this list.

+ names_only: If true, the function will not read the file but just return "".

+ It will still add the filename to |inlined_files|.

+ Returns:

+ string

+ """

+ filename = src_match.group('filename')

+ if filename.find(':') != -1:

+ # filename is probably a URL, which we don't want to bother inlining

+ return src_match.group(0)

+ filename = filename.replace('%DISTRIBUTION%', distribution)

+ filepath = os.path.join(base_path, filename)

+ inlined_files.add(filepath)

+ if names_only:

+ return ""

+ prefix = src_match.string[src_match.start():src_match.start('filename')-1]

+ return "%s\"%s\"" % (prefix, FileDataUrl(filepath))

+def InsertImageSet(

+ src_match, base_path, distribution, inlined_files, names_only=False):

+ filename = src_match.group('filename')

+ attr = src_match.group('attribute')

+ prefix = src_match.string[src_match.start():src_match.start('filename')-1]

+ # Any matches for which a chrome URL handler will serve all scale factors

+ # can simply request all scale factors.

+ if _THEME_SOURCE.match(filename):

+ images = ["url(\"%s\") %s" % (filename, '1x')]

+ for sc in scale_factors:

+ images.append("url(\"%s@%s\") %s" % (filename, sc, sc))

+ return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))

+ if filename.find(':') != -1:

+ # filename is probably a URL, which we don't want to bother inlining

+ return src_match.group(0)

+ filename = filename.replace('%DISTRIBUTION%', distribution)

+ filepath = os.path.join(base_path, filename)

+ inlined_files.add(filepath)

+ images = ["url(\"%s\") %s" % (FileDataUrl(filepath), '1x')]

+ for sc in scale_factors:

+ # check for existence of file and add to image set.

+ scale_image = filename.replace('%DISTRIBUTION%', distribution)

+ scale_path = os.path.split(os.path.join(base_path, scale_image))

+ scale_image_path = "%s/%s/%s" % (scale_path[0], sc, scale_path[1])

+ if os.path.isfile(scale_image_path):

+ inlined_files.add(scale_image_path)

+ images.append("url(\"%s\") %s" % (FileDataUrl(scale_image_path), sc))

+ return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))

+class InlinedData:

+ """Helper class holding the results from DoInline().

+ Holds the inlined data and the set of filenames of all the inlined

+ files.

+ """

+ def __init__(self, inlined_data, inlined_files):

+ self.inlined_data = inlined_data

+ self.inlined_files = inlined_files

+def DoInline(

+ input_filename, grd_node, allow_external_script=False, names_only=False):

+ """Helper function that inlines the resources in a specified file.

+ Reads input_filename, finds all the src attributes and attempts to

+ inline the files they are referring to, then returns the result and

+ the set of inlined files.

+ Args:

+ input_filename: name of file to read in

+ grd_node: html node from the grd file for this include tag

+ names_only: |nil| will be returned for the inlined contents (faster).

+ Returns:

+ a tuple of the inlined data as a string and the set of filenames

+ of all the inlined files

+ """

+ input_filepath = os.path.dirname(input_filename)

+ distribution = DIST_DEFAULT

+ if DIST_ENV_VAR in os.environ.keys():

+ distribution = os.environ[DIST_ENV_VAR]

+ if len(distribution) > 1 and distribution[0] == '_':

+ distribution = distribution[1:].lower()

+ # Keep track of all the files we inline.

+ inlined_files = set()

+ def SrcReplace(src_match, filepath=input_filepath,

+ inlined_files=inlined_files):

+ """Helper function to provide SrcInlineAsDataURL with the base file path"""

+ return SrcInlineAsDataURL(

+ src_match, filepath, distribution, inlined_files, names_only=names_only)

+ def SrcImageSet(src_match, filepath=input_filepath,

+ inlined_files=inlined_files):

+ """Helper function to provide InsertImageSet with the base file path"""

+ return InsertImageSet(

+ src_match, filepath, distribution, inlined_files, names_only=names_only)

+ def GetFilepath(src_match):

+ filename = src_match.group('filename')

+ if filename.find(':') != -1:

+ # filename is probably a URL, which we don't want to bother inlining

+ return None

+ filename = filename.replace('%DISTRIBUTION%', distribution)

+ return os.path.join(input_filepath, filename)

+ def IsConditionSatisfied(src_match):

+ expression = src_match.group('expression')

+ return grd_node is None or grd_node.EvaluateCondition(expression)

+ def CheckConditionalElements(str):

+ """Helper function to conditionally inline inner elements"""

+ while True:

+ begin_if = _BEGIN_IF_BLOCK.search(str)

+ if begin_if is None:

+ return str

+ condition_satisfied = IsConditionSatisfied(begin_if)

+ leading = str[0:begin_if.start()]

+ content_start = begin_if.end()

+ # Find matching "if" block end.

+ count = 1

+ pos = begin_if.end()

+ while True:

+ end_if = _END_IF_BLOCK.search(str, pos)

+ if end_if is None:

+ raise Exception('Unmatched <if>')

+ next_if = _BEGIN_IF_BLOCK.search(str, pos)

+ if next_if is None or next_if.start() >= end_if.end():

+ count = count - 1

+ if count == 0:

+ break

+ pos = end_if.end()

+ else:

+ count = count + 1

+ pos = next_if.end()

+ content = str[content_start:end_if.start()]

+ trailing = str[end_if.end():]

+ if condition_satisfied:

+ str = leading + CheckConditionalElements(content) + trailing

+ else:

+ str = leading + trailing

+ def InlineFileContents(src_match, pattern, inlined_files=inlined_files):

+ """Helper function to inline external files of various types"""

+ filepath = GetFilepath(src_match)

+ if filepath is None:

+ return src_match.group(0)

+ inlined_files.add(filepath)

+ # Even if names_only is set, html files needs to be opened, because it

+ # can link to images that need to be added to the file set.

+ if names_only and not filepath.endswith('.html'):

+ return ""

+ return pattern % InlineToString(filepath, grd_node, allow_external_script)

+ def InlineIncludeFiles(src_match):

+ """Helper function to directly inline generic external files (without

+ wrapping them with any kind of tags).

+ """

+ return InlineFileContents(src_match, '%s')

+ def InlineScript(match):

+ """Helper function to inline external script files"""

+ attrs = (match.group('attrs1') + match.group('attrs2')).strip()

+ if attrs:

+ attrs = ' ' + attrs

+ return InlineFileContents(match, '<script' + attrs + '>%s</script>')

+ def InlineCSSText(text, css_filepath):

+ """Helper function that inlines external resources in CSS text"""

+ filepath = os.path.dirname(css_filepath)

+ return InlineCSSImages(text, filepath)

+ def InlineCSSFile(src_match, inlined_files=inlined_files):

+ """Helper function to inline external css files.

+ Args:

+ src_match: A regular expression match with a named group named "filename".

+ Returns:

+ The text that should replace the reference to the CSS file.

+ """

+ filepath = GetFilepath(src_match)

+ if filepath is None:

+ return src_match.group(0)

+ # Even if names_only is set, the CSS file needs to be opened, because it

+ # can link to images that need to be added to the file set.

+ inlined_files.add(filepath)

+ # When resolving CSS files we need to pass in the path so that relative URLs

+ # can be resolved.

+ return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath)

+ def InlineCSSImages(text, filepath=input_filepath):

+ """Helper function that inlines external images in CSS backgrounds."""

+ # Replace contents of url() for css attributes: content, background,

+ # or *-image.

+ return re.sub('(?P<attribute>content|background|[\w-]*-image):[ ]*' +

+ 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")',

+ lambda m: SrcImageSet(m, filepath),

+ text)

+ flat_text = ReadFile(input_filename)

+ if not allow_external_script:

+ # We need to inline css and js before we inline images so that image

+ # references gets inlined in the css and js

+ flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +

+ '(?P<attrs2>.*?)></script>',

+ InlineScript,

+ flat_text)

+ flat_text = re.sub(

+ '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>',

+ InlineCSSFile,

+ flat_text)

+ flat_text = re.sub(

+ '<include\s+src="(?P<filename>[^"\']*)".*>',

+ InlineIncludeFiles,

+ flat_text)

+ # Check conditional elements, remove unsatisfied ones from the file.

+ flat_text = CheckConditionalElements(flat_text)

+ flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"',

+ SrcReplace,

+ flat_text)

+ # TODO(arv): Only do this inside <style> tags.

+ flat_text = InlineCSSImages(flat_text)

+ flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"',

+ SrcReplace,

+ flat_text)

+ if names_only:

+ flat_text = None # Will contains garbage if the flag is set anyway.

+ return InlinedData(flat_text, inlined_files)

+def InlineToString(input_filename, grd_node, allow_external_script=False):

+ """Inlines the resources in a specified file and returns it as a string.

+ Args:

+ input_filename: name of file to read in

+ grd_node: html node from the grd file for this include tag

+ Returns:

+ the inlined data as a string

+ """

+ try:

+ return DoInline(input_filename,

+ grd_node,

+ allow_external_script=allow_external_script).inlined_data

+ except IOError, e:

+ raise Exception("Failed to open %s while trying to flatten %s. (%s)" %

+ (e.filename, input_filename, e.strerror))

+def InlineToFile(input_filename, output_filename, grd_node):

+ """Inlines the resources in a specified file and writes it.

+ Reads input_filename, finds all the src attributes and attempts to

+ inline the files they are referring to, then writes the result

+ to output_filename.

+ Args:

+ input_filename: name of file to read in

+ output_filename: name of file to be written to

+ grd_node: html node from the grd file for this include tag

+ Returns:

+ a set of filenames of all the inlined files

+ """

+ inlined_data = InlineToString(input_filename, grd_node)

+ out_file = open(output_filename, 'wb')

+ out_file.writelines(inlined_data)

+ out_file.close()

+def GetResourceFilenames(filename, allow_external_script=False):

+ """For a grd file, returns a set of all the files that would be inline."""

+ try:

+ return DoInline(filename, None, names_only=True,

+ allow_external_script=allow_external_script).inlined_files

+ except IOError, e:

+ raise Exception("Failed to open %s while trying to flatten %s. (%s)" %

+ (e.filename, filename, e.strerror))

+class ChromeHtml(interface.GathererBase):

+ '''Represents an HTML document.'''

+ def __init__(self, html):

+ '''Creates a new object that represents 'text'.

+ Args:

+ html: 'filename.html'

+ '''

+ super(type(self), self).__init__()

+ self.filename_ = html

+ self.inlined_text_ = None

+ self.scale_factors_ = []

+ def SetAttributes(self, attrs):

+ '''Sets node attributes used by the gatherer.

+ This checks the scale_factors attribute.

+ Args:

+ attrs: The mapping of node attributes.

+ '''

+ if 'scale_factors' in attrs:

+ self.scale_factors_ = attrs['scale_factors'].split(' ')

+ def GetText(self):

+ '''Returns the original text of the HTML document'''

+ return self.inlined_text_

+ def GetData(self, lang, encoding):

+ '''Return inlined text of the HTML document'''

+ return self.inlined_text_

+ def Translate(self, lang, pseudo_if_not_available=True,

+ skeleton_gatherer=None, fallback_to_english=False):

+ '''Returns this document translated.'''

+ return self.inlined_text_

+ def Parse(self):

+ self.inlined_text_ = InlineToString(self.filename_, None)

+ @staticmethod

+ def FromFile(html, extkey=None, encoding = 'utf-8'):

+ '''Creates a ChromeHtml object for the contents of 'html'. Returns a new

+ ChromeHtml object.

+ Args:

+ html: file('') | 'filename.html'

+ extkey: ignored

+ encoding: 'utf-8' (encoding is ignored)

+ Return:

+ ChromeHtml(text_of_file)

+ '''

+ if not isinstance(html, types.StringTypes):

+ html = html.name

+ return ChromeHtml(html)

« no previous file with comments | « grit/format/data_pack.py ('k') | grit/node/structure.py » ('j') | no next file with comments »