grit/gather/chrome_html.py - Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set…

Side by Side Diff: grit/gather/chrome_html.py

Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set… (Closed) Base URL: http://git.chromium.org/external/grit-i18n.git@master

Patch Set: Created 8 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """Prepares a Chrome HTML file by inlining resources and adding references to hi gh DPI resources.
	tony 2012/05/17 17:57:25 Why can't we reuse html_inline.py? Can we just ad Why can't we reuse html_inline.py? Can we just add the image-set handling to that file?
	7

	8 This is a small script that takes a HTML file, looks for src attributes

	9 and inlines the specified file, producing one HTML file with no external

	10 dependencies. It recursively inlines the included files. When inlining CSS

	11 image files this script also checks for the existence of high DPI versions

	12 of the inlined file including those on relevant platforms.

	13 """

	14

	15 import os

	16 import re

	17 import sys

	18 import types

	19 import base64

	20 import mimetypes

	21

	22 from grit.gather import interface

	23 from grit import lazy_re

	24 from grit import util

	25

	26 scale_factors = ['2x']

	27

	28 DIST_DEFAULT = 'chromium'

	29 DIST_ENV_VAR = 'CHROMIUM_BUILD'

	30 DIST_SUBSTR = '%DISTRIBUTION%'

	31

	32 # Matches beginning of an "if" block with trailing spaces.

	33 _BEGIN_IF_BLOCK = lazy_re.compile(

	34 '<if [^>]?expr="(?P<expression>[^"])"[^>]?>\s')

	35

	36 # Matches ending of an "if" block with preceding spaces.

	37 _END_IF_BLOCK = lazy_re.compile('\s*</if>')

	38

	39 # Matches a chrome theme source URL.

	40 _THEME_SOURCE = lazy_re.compile('chrome://theme/IDR_[A-Z0-9_]*')

	41

	42 def ReadFile(input_filename):

	43 """Helper function that returns input_filename as a string.

	44

	45 Args:

	46 input_filename: name of file to be read

	47

	48 Returns:

	49 string

	50 """

	51 f = open(input_filename, 'rb')

	52 file_contents = f.read()

	53 f.close()

	54 return file_contents

	55

	56 def FileDataUrl(path):

	57 mimetype = mimetypes.guess_type(path)[0] or 'text/plain'

	58 inline_data = base64.standard_b64encode(ReadFile(path))

	59 return "data:%s;base64,%s" % (mimetype, inline_data)

	60

	61 def SrcInlineAsDataURL(

	62 src_match, base_path, distribution, inlined_files, names_only=False):

	63 """regex replace function.

	64

	65 Takes a regex match for src="filename", attempts to read the file

	66 at 'filename' and returns the src attribute with the file inlined

	67 as a data URI. If it finds DIST_SUBSTR string in file name, replaces

	68 it with distribution.

	69

	70 Args:

	71 src_match: regex match object with 'filename' named capturing group

	72 base_path: path that to look for files in

	73 distribution: string that should replace DIST_SUBSTR

	74 inlined_files: The name of the opened file is appended to this list.

	75 names_only: If true, the function will not read the file but just return "".

	76 It will still add the filename to \|inlined_files\|.

	77

	78 Returns:

	79 string

	80 """

	81 filename = src_match.group('filename')

	82

	83 if filename.find(':') != -1:

	84 # filename is probably a URL, which we don't want to bother inlining

	85 return src_match.group(0)

	86

	87 filename = filename.replace('%DISTRIBUTION%', distribution)

	88 filepath = os.path.join(base_path, filename)

	89 inlined_files.add(filepath)

	90

	91 if names_only:

	92 return ""

	93

	94 prefix = src_match.string[src_match.start():src_match.start('filename')-1]

	95 return "%s\"%s\"" % (prefix, FileDataUrl(filepath))

	96

	97 def InsertImageSet(

	98 src_match, base_path, distribution, inlined_files, names_only=False):

	99 filename = src_match.group('filename')

	100 attr = src_match.group('attribute')

	101 prefix = src_match.string[src_match.start():src_match.start('filename')-1]

	102

	103 # Any matches for which a chrome URL handler will serve all scale factors

	104 # can simply request all scale factors.

	105 if _THEME_SOURCE.match(filename):

	106 images = ["url(\"%s\") %s" % (filename, '1x')]

	107 for sc in scale_factors:

	108 images.append("url(\"%s@%s\") %s" % (filename, sc, sc))

	109 return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))

	110

	111 if filename.find(':') != -1:

	112 # filename is probably a URL, which we don't want to bother inlining

	113 return src_match.group(0)

	114

	115 filename = filename.replace('%DISTRIBUTION%', distribution)

	116 filepath = os.path.join(base_path, filename)

	117 inlined_files.add(filepath)

	118 images = ["url(\"%s\") %s" % (FileDataUrl(filepath), '1x')]

	119

	120 for sc in scale_factors:

	121 # check for existence of file and add to image set.

	122 scale_image = filename.replace('%DISTRIBUTION%', distribution)

	123 scale_path = os.path.split(os.path.join(base_path, scale_image))

	124 scale_image_path = "%s/%s/%s" % (scale_path[0], sc, scale_path[1])

	125 if os.path.isfile(scale_image_path):

	126 inlined_files.add(scale_image_path)

	127 images.append("url(\"%s\") %s" % (FileDataUrl(scale_image_path), sc))

	128 return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))

	129

	130 class InlinedData:

	131 """Helper class holding the results from DoInline().

	132

	133 Holds the inlined data and the set of filenames of all the inlined

	134 files.

	135 """

	136 def __init__(self, inlined_data, inlined_files):

	137 self.inlined_data = inlined_data

	138 self.inlined_files = inlined_files

	139

	140 def DoInline(

	141 input_filename, grd_node, allow_external_script=False, names_only=False):

	142 """Helper function that inlines the resources in a specified file.

	143

	144 Reads input_filename, finds all the src attributes and attempts to

	145 inline the files they are referring to, then returns the result and

	146 the set of inlined files.

	147

	148 Args:

	149 input_filename: name of file to read in

	150 grd_node: html node from the grd file for this include tag

	151 names_only: \|nil\| will be returned for the inlined contents (faster).

	152 Returns:

	153 a tuple of the inlined data as a string and the set of filenames

	154 of all the inlined files

	155 """

	156 input_filepath = os.path.dirname(input_filename)

	157

	158 distribution = DIST_DEFAULT

	159 if DIST_ENV_VAR in os.environ.keys():

	160 distribution = os.environ[DIST_ENV_VAR]

	161 if len(distribution) > 1 and distribution[0] == '_':

	162 distribution = distribution[1:].lower()

	163

	164 # Keep track of all the files we inline.

	165 inlined_files = set()

	166

	167 def SrcReplace(src_match, filepath=input_filepath,

	168 inlined_files=inlined_files):

	169 """Helper function to provide SrcInlineAsDataURL with the base file path"""

	170 return SrcInlineAsDataURL(

	171 src_match, filepath, distribution, inlined_files, names_only=names_only)

	172

	173 def SrcImageSet(src_match, filepath=input_filepath,

	174 inlined_files=inlined_files):

	175 """Helper function to provide InsertImageSet with the base file path"""

	176 return InsertImageSet(

	177 src_match, filepath, distribution, inlined_files, names_only=names_only)

	178

	179 def GetFilepath(src_match):

	180 filename = src_match.group('filename')

	181

	182 if filename.find(':') != -1:

	183 # filename is probably a URL, which we don't want to bother inlining

	184 return None

	185

	186 filename = filename.replace('%DISTRIBUTION%', distribution)

	187 return os.path.join(input_filepath, filename)

	188

	189 def IsConditionSatisfied(src_match):

	190 expression = src_match.group('expression')

	191 return grd_node is None or grd_node.EvaluateCondition(expression)

	192

	193 def CheckConditionalElements(str):

	194 """Helper function to conditionally inline inner elements"""

	195 while True:

	196 begin_if = _BEGIN_IF_BLOCK.search(str)

	197 if begin_if is None:

	198 return str

	199

	200 condition_satisfied = IsConditionSatisfied(begin_if)

	201 leading = str[0:begin_if.start()]

	202 content_start = begin_if.end()

	203

	204 # Find matching "if" block end.

	205 count = 1

	206 pos = begin_if.end()

	207 while True:

	208 end_if = _END_IF_BLOCK.search(str, pos)

	209 if end_if is None:

	210 raise Exception('Unmatched <if>')

	211

	212 next_if = _BEGIN_IF_BLOCK.search(str, pos)

	213 if next_if is None or next_if.start() >= end_if.end():

	214 count = count - 1

	215 if count == 0:

	216 break

	217 pos = end_if.end()

	218 else:

	219 count = count + 1

	220 pos = next_if.end()

	221

	222 content = str[content_start:end_if.start()]

	223 trailing = str[end_if.end():]

	224

	225 if condition_satisfied:

	226 str = leading + CheckConditionalElements(content) + trailing

	227 else:

	228 str = leading + trailing

	229

	230 def InlineFileContents(src_match, pattern, inlined_files=inlined_files):

	231 """Helper function to inline external files of various types"""

	232 filepath = GetFilepath(src_match)

	233 if filepath is None:

	234 return src_match.group(0)

	235 inlined_files.add(filepath)

	236

	237 # Even if names_only is set, html files needs to be opened, because it

	238 # can link to images that need to be added to the file set.

	239 if names_only and not filepath.endswith('.html'):

	240 return ""

	241

	242 return pattern % InlineToString(filepath, grd_node, allow_external_script)

	243

	244 def InlineIncludeFiles(src_match):

	245 """Helper function to directly inline generic external files (without

	246 wrapping them with any kind of tags).

	247 """

	248 return InlineFileContents(src_match, '%s')

	249

	250 def InlineScript(match):

	251 """Helper function to inline external script files"""

	252 attrs = (match.group('attrs1') + match.group('attrs2')).strip()

	253 if attrs:

	254 attrs = ' ' + attrs

	255 return InlineFileContents(match, '<script' + attrs + '>%s</script>')

	256

	257 def InlineCSSText(text, css_filepath):

	258 """Helper function that inlines external resources in CSS text"""

	259 filepath = os.path.dirname(css_filepath)

	260 return InlineCSSImages(text, filepath)

	261

	262 def InlineCSSFile(src_match, inlined_files=inlined_files):

	263 """Helper function to inline external css files.

	264

	265 Args:

	266 src_match: A regular expression match with a named group named "filename".

	267

	268 Returns:

	269 The text that should replace the reference to the CSS file.

	270 """

	271 filepath = GetFilepath(src_match)

	272 if filepath is None:

	273 return src_match.group(0)

	274

	275 # Even if names_only is set, the CSS file needs to be opened, because it

	276 # can link to images that need to be added to the file set.

	277 inlined_files.add(filepath)

	278 # When resolving CSS files we need to pass in the path so that relative URLs

	279 # can be resolved.

	280 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath)

	281

	282 def InlineCSSImages(text, filepath=input_filepath):

	283 """Helper function that inlines external images in CSS backgrounds."""

	284 # Replace contents of url() for css attributes: content, background,

	285 # or *-image.

	286 return re.sub('(?P<attribute>content\|background\|[\w-]-image):[ ]' +

	287 'url\((?:\'\|\")(?P<filename>[^"\'\)\(]*)(?:\'\|\")',

	288 lambda m: SrcImageSet(m, filepath),

	289 text)

	290

	291 flat_text = ReadFile(input_filename)

	292

	293 if not allow_external_script:

	294 # We need to inline css and js before we inline images so that image

	295 # references gets inlined in the css and js

	296 flat_text = re.sub('<script (?P<attrs1>.?)src="(?P<filename>[^"\'])"' +

	297 '(?P<attrs2>.*?)></script>',

	298 InlineScript,

	299 flat_text)

	300

	301 flat_text = re.sub(

	302 '<link rel="stylesheet".+?href="(?P<filename>[^"])".?>',

	303 InlineCSSFile,

	304 flat_text)

	305

	306 flat_text = re.sub(

	307 '<include\s+src="(?P<filename>[^"\'])".>',

	308 InlineIncludeFiles,

	309 flat_text)

	310

	311 # Check conditional elements, remove unsatisfied ones from the file.

	312 flat_text = CheckConditionalElements(flat_text)

	313

	314 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"',

	315 SrcReplace,

	316 flat_text)

	317

	318 # TODO(arv): Only do this inside <style> tags.

	319 flat_text = InlineCSSImages(flat_text)

	320 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"',

	321 SrcReplace,

	322 flat_text)

	323

	324 if names_only:

	325 flat_text = None # Will contains garbage if the flag is set anyway.

	326 return InlinedData(flat_text, inlined_files)

	327

	328

	329 def InlineToString(input_filename, grd_node, allow_external_script=False):

	330 """Inlines the resources in a specified file and returns it as a string.

	331

	332 Args:

	333 input_filename: name of file to read in

	334 grd_node: html node from the grd file for this include tag

	335 Returns:

	336 the inlined data as a string

	337 """

	338 try:

	339 return DoInline(input_filename,

	340 grd_node,

	341 allow_external_script=allow_external_script).inlined_data

	342 except IOError, e:

	343 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %

	344 (e.filename, input_filename, e.strerror))

	345

	346

	347 def InlineToFile(input_filename, output_filename, grd_node):

	348 """Inlines the resources in a specified file and writes it.

	349

	350 Reads input_filename, finds all the src attributes and attempts to

	351 inline the files they are referring to, then writes the result

	352 to output_filename.

	353

	354 Args:

	355 input_filename: name of file to read in

	356 output_filename: name of file to be written to

	357 grd_node: html node from the grd file for this include tag

	358 Returns:

	359 a set of filenames of all the inlined files

	360 """

	361 inlined_data = InlineToString(input_filename, grd_node)

	362 out_file = open(output_filename, 'wb')

	363 out_file.writelines(inlined_data)

	364 out_file.close()

	365

	366

	367 def GetResourceFilenames(filename, allow_external_script=False):

	368 """For a grd file, returns a set of all the files that would be inline."""

	369 try:

	370 return DoInline(filename, None, names_only=True,

	371 allow_external_script=allow_external_script).inlined_files

	372 except IOError, e:

	373 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %

	374 (e.filename, filename, e.strerror))

	375

	376

	377 class ChromeHtml(interface.GathererBase):

	378 '''Represents an HTML document.'''

	379

	380 def __init__(self, html):

	381 '''Creates a new object that represents 'text'.

	382 Args:

	383 html: 'filename.html'

	384 '''

	385 super(type(self), self).__init__()

	386 self.filename_ = html

	387 self.inlined_text_ = None

	388 self.scale_factors_ = []

	389

	390 def SetAttributes(self, attrs):

	391 '''Sets node attributes used by the gatherer.

	392

	393 This checks the scale_factors attribute.

	394

	395 Args:

	396 attrs: The mapping of node attributes.

	397 '''

	398 if 'scale_factors' in attrs:

	399 self.scale_factors_ = attrs['scale_factors'].split(' ')

	400

	401 def GetText(self):

	402 '''Returns the original text of the HTML document'''

	403 return self.inlined_text_

	404

	405 def GetData(self, lang, encoding):

	406 '''Return inlined text of the HTML document'''

	407 return self.inlined_text_

	408

	409 def Translate(self, lang, pseudo_if_not_available=True,

	410 skeleton_gatherer=None, fallback_to_english=False):

	411 '''Returns this document translated.'''

	412 return self.inlined_text_

	413

	414 def Parse(self):

	415 self.inlined_text_ = InlineToString(self.filename_, None)

	416

	417 @staticmethod

	418 def FromFile(html, extkey=None, encoding = 'utf-8'):

	419 '''Creates a ChromeHtml object for the contents of 'html'. Returns a new

	420 ChromeHtml object.

	421

	422 Args:

	423 html: file('') \| 'filename.html'

	424 extkey: ignored

	425 encoding: 'utf-8' (encoding is ignored)

	426

	427 Return:

	428 ChromeHtml(text_of_file)

	429 '''

	430 if not isinstance(html, types.StringTypes):

	431 html = html.name

	432

	433 return ChromeHtml(html)

OLD	NEW

« no previous file with comments | « grit/format/data_pack.py ('k') | grit/node/structure.py » ('j') | no next file with comments »