Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(173)

Side by Side Diff: grit/gather/chrome_html.py

Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set… (Closed) Base URL: http://git.chromium.org/external/grit-i18n.git@master
Patch Set: Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « grit/format/data_pack.py ('k') | grit/node/structure.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 """Prepares a Chrome HTML file by inlining resources and adding references to hi gh DPI resources.
tony 2012/05/17 17:57:25 Why can't we reuse html_inline.py? Can we just ad
7
8 This is a small script that takes a HTML file, looks for src attributes
9 and inlines the specified file, producing one HTML file with no external
10 dependencies. It recursively inlines the included files. When inlining CSS
11 image files this script also checks for the existence of high DPI versions
12 of the inlined file including those on relevant platforms.
13 """
14
15 import os
16 import re
17 import sys
18 import types
19 import base64
20 import mimetypes
21
22 from grit.gather import interface
23 from grit import lazy_re
24 from grit import util
25
26 scale_factors = ['2x']
27
28 DIST_DEFAULT = 'chromium'
29 DIST_ENV_VAR = 'CHROMIUM_BUILD'
30 DIST_SUBSTR = '%DISTRIBUTION%'
31
32 # Matches beginning of an "if" block with trailing spaces.
33 _BEGIN_IF_BLOCK = lazy_re.compile(
34 '<if [^>]*?expr="(?P<expression>[^"]*)"[^>]*?>\s*')
35
36 # Matches ending of an "if" block with preceding spaces.
37 _END_IF_BLOCK = lazy_re.compile('\s*</if>')
38
39 # Matches a chrome theme source URL.
40 _THEME_SOURCE = lazy_re.compile('chrome://theme/IDR_[A-Z0-9_]*')
41
42 def ReadFile(input_filename):
43 """Helper function that returns input_filename as a string.
44
45 Args:
46 input_filename: name of file to be read
47
48 Returns:
49 string
50 """
51 f = open(input_filename, 'rb')
52 file_contents = f.read()
53 f.close()
54 return file_contents
55
56 def FileDataUrl(path):
57 mimetype = mimetypes.guess_type(path)[0] or 'text/plain'
58 inline_data = base64.standard_b64encode(ReadFile(path))
59 return "data:%s;base64,%s" % (mimetype, inline_data)
60
61 def SrcInlineAsDataURL(
62 src_match, base_path, distribution, inlined_files, names_only=False):
63 """regex replace function.
64
65 Takes a regex match for src="filename", attempts to read the file
66 at 'filename' and returns the src attribute with the file inlined
67 as a data URI. If it finds DIST_SUBSTR string in file name, replaces
68 it with distribution.
69
70 Args:
71 src_match: regex match object with 'filename' named capturing group
72 base_path: path that to look for files in
73 distribution: string that should replace DIST_SUBSTR
74 inlined_files: The name of the opened file is appended to this list.
75 names_only: If true, the function will not read the file but just return "".
76 It will still add the filename to |inlined_files|.
77
78 Returns:
79 string
80 """
81 filename = src_match.group('filename')
82
83 if filename.find(':') != -1:
84 # filename is probably a URL, which we don't want to bother inlining
85 return src_match.group(0)
86
87 filename = filename.replace('%DISTRIBUTION%', distribution)
88 filepath = os.path.join(base_path, filename)
89 inlined_files.add(filepath)
90
91 if names_only:
92 return ""
93
94 prefix = src_match.string[src_match.start():src_match.start('filename')-1]
95 return "%s\"%s\"" % (prefix, FileDataUrl(filepath))
96
97 def InsertImageSet(
98 src_match, base_path, distribution, inlined_files, names_only=False):
99 filename = src_match.group('filename')
100 attr = src_match.group('attribute')
101 prefix = src_match.string[src_match.start():src_match.start('filename')-1]
102
103 # Any matches for which a chrome URL handler will serve all scale factors
104 # can simply request all scale factors.
105 if _THEME_SOURCE.match(filename):
106 images = ["url(\"%s\") %s" % (filename, '1x')]
107 for sc in scale_factors:
108 images.append("url(\"%s@%s\") %s" % (filename, sc, sc))
109 return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))
110
111 if filename.find(':') != -1:
112 # filename is probably a URL, which we don't want to bother inlining
113 return src_match.group(0)
114
115 filename = filename.replace('%DISTRIBUTION%', distribution)
116 filepath = os.path.join(base_path, filename)
117 inlined_files.add(filepath)
118 images = ["url(\"%s\") %s" % (FileDataUrl(filepath), '1x')]
119
120 for sc in scale_factors:
121 # check for existence of file and add to image set.
122 scale_image = filename.replace('%DISTRIBUTION%', distribution)
123 scale_path = os.path.split(os.path.join(base_path, scale_image))
124 scale_image_path = "%s/%s/%s" % (scale_path[0], sc, scale_path[1])
125 if os.path.isfile(scale_image_path):
126 inlined_files.add(scale_image_path)
127 images.append("url(\"%s\") %s" % (FileDataUrl(scale_image_path), sc))
128 return "%s: -webkit-image-set(%s" % (attr, ', '.join(images))
129
130 class InlinedData:
131 """Helper class holding the results from DoInline().
132
133 Holds the inlined data and the set of filenames of all the inlined
134 files.
135 """
136 def __init__(self, inlined_data, inlined_files):
137 self.inlined_data = inlined_data
138 self.inlined_files = inlined_files
139
140 def DoInline(
141 input_filename, grd_node, allow_external_script=False, names_only=False):
142 """Helper function that inlines the resources in a specified file.
143
144 Reads input_filename, finds all the src attributes and attempts to
145 inline the files they are referring to, then returns the result and
146 the set of inlined files.
147
148 Args:
149 input_filename: name of file to read in
150 grd_node: html node from the grd file for this include tag
151 names_only: |nil| will be returned for the inlined contents (faster).
152 Returns:
153 a tuple of the inlined data as a string and the set of filenames
154 of all the inlined files
155 """
156 input_filepath = os.path.dirname(input_filename)
157
158 distribution = DIST_DEFAULT
159 if DIST_ENV_VAR in os.environ.keys():
160 distribution = os.environ[DIST_ENV_VAR]
161 if len(distribution) > 1 and distribution[0] == '_':
162 distribution = distribution[1:].lower()
163
164 # Keep track of all the files we inline.
165 inlined_files = set()
166
167 def SrcReplace(src_match, filepath=input_filepath,
168 inlined_files=inlined_files):
169 """Helper function to provide SrcInlineAsDataURL with the base file path"""
170 return SrcInlineAsDataURL(
171 src_match, filepath, distribution, inlined_files, names_only=names_only)
172
173 def SrcImageSet(src_match, filepath=input_filepath,
174 inlined_files=inlined_files):
175 """Helper function to provide InsertImageSet with the base file path"""
176 return InsertImageSet(
177 src_match, filepath, distribution, inlined_files, names_only=names_only)
178
179 def GetFilepath(src_match):
180 filename = src_match.group('filename')
181
182 if filename.find(':') != -1:
183 # filename is probably a URL, which we don't want to bother inlining
184 return None
185
186 filename = filename.replace('%DISTRIBUTION%', distribution)
187 return os.path.join(input_filepath, filename)
188
189 def IsConditionSatisfied(src_match):
190 expression = src_match.group('expression')
191 return grd_node is None or grd_node.EvaluateCondition(expression)
192
193 def CheckConditionalElements(str):
194 """Helper function to conditionally inline inner elements"""
195 while True:
196 begin_if = _BEGIN_IF_BLOCK.search(str)
197 if begin_if is None:
198 return str
199
200 condition_satisfied = IsConditionSatisfied(begin_if)
201 leading = str[0:begin_if.start()]
202 content_start = begin_if.end()
203
204 # Find matching "if" block end.
205 count = 1
206 pos = begin_if.end()
207 while True:
208 end_if = _END_IF_BLOCK.search(str, pos)
209 if end_if is None:
210 raise Exception('Unmatched <if>')
211
212 next_if = _BEGIN_IF_BLOCK.search(str, pos)
213 if next_if is None or next_if.start() >= end_if.end():
214 count = count - 1
215 if count == 0:
216 break
217 pos = end_if.end()
218 else:
219 count = count + 1
220 pos = next_if.end()
221
222 content = str[content_start:end_if.start()]
223 trailing = str[end_if.end():]
224
225 if condition_satisfied:
226 str = leading + CheckConditionalElements(content) + trailing
227 else:
228 str = leading + trailing
229
230 def InlineFileContents(src_match, pattern, inlined_files=inlined_files):
231 """Helper function to inline external files of various types"""
232 filepath = GetFilepath(src_match)
233 if filepath is None:
234 return src_match.group(0)
235 inlined_files.add(filepath)
236
237 # Even if names_only is set, html files needs to be opened, because it
238 # can link to images that need to be added to the file set.
239 if names_only and not filepath.endswith('.html'):
240 return ""
241
242 return pattern % InlineToString(filepath, grd_node, allow_external_script)
243
244 def InlineIncludeFiles(src_match):
245 """Helper function to directly inline generic external files (without
246 wrapping them with any kind of tags).
247 """
248 return InlineFileContents(src_match, '%s')
249
250 def InlineScript(match):
251 """Helper function to inline external script files"""
252 attrs = (match.group('attrs1') + match.group('attrs2')).strip()
253 if attrs:
254 attrs = ' ' + attrs
255 return InlineFileContents(match, '<script' + attrs + '>%s</script>')
256
257 def InlineCSSText(text, css_filepath):
258 """Helper function that inlines external resources in CSS text"""
259 filepath = os.path.dirname(css_filepath)
260 return InlineCSSImages(text, filepath)
261
262 def InlineCSSFile(src_match, inlined_files=inlined_files):
263 """Helper function to inline external css files.
264
265 Args:
266 src_match: A regular expression match with a named group named "filename".
267
268 Returns:
269 The text that should replace the reference to the CSS file.
270 """
271 filepath = GetFilepath(src_match)
272 if filepath is None:
273 return src_match.group(0)
274
275 # Even if names_only is set, the CSS file needs to be opened, because it
276 # can link to images that need to be added to the file set.
277 inlined_files.add(filepath)
278 # When resolving CSS files we need to pass in the path so that relative URLs
279 # can be resolved.
280 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath)
281
282 def InlineCSSImages(text, filepath=input_filepath):
283 """Helper function that inlines external images in CSS backgrounds."""
284 # Replace contents of url() for css attributes: content, background,
285 # or *-image.
286 return re.sub('(?P<attribute>content|background|[\w-]*-image):[ ]*' +
287 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")',
288 lambda m: SrcImageSet(m, filepath),
289 text)
290
291 flat_text = ReadFile(input_filename)
292
293 if not allow_external_script:
294 # We need to inline css and js before we inline images so that image
295 # references gets inlined in the css and js
296 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
297 '(?P<attrs2>.*?)></script>',
298 InlineScript,
299 flat_text)
300
301 flat_text = re.sub(
302 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>',
303 InlineCSSFile,
304 flat_text)
305
306 flat_text = re.sub(
307 '<include\s+src="(?P<filename>[^"\']*)".*>',
308 InlineIncludeFiles,
309 flat_text)
310
311 # Check conditional elements, remove unsatisfied ones from the file.
312 flat_text = CheckConditionalElements(flat_text)
313
314 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"',
315 SrcReplace,
316 flat_text)
317
318 # TODO(arv): Only do this inside <style> tags.
319 flat_text = InlineCSSImages(flat_text)
320 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"',
321 SrcReplace,
322 flat_text)
323
324 if names_only:
325 flat_text = None # Will contains garbage if the flag is set anyway.
326 return InlinedData(flat_text, inlined_files)
327
328
329 def InlineToString(input_filename, grd_node, allow_external_script=False):
330 """Inlines the resources in a specified file and returns it as a string.
331
332 Args:
333 input_filename: name of file to read in
334 grd_node: html node from the grd file for this include tag
335 Returns:
336 the inlined data as a string
337 """
338 try:
339 return DoInline(input_filename,
340 grd_node,
341 allow_external_script=allow_external_script).inlined_data
342 except IOError, e:
343 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
344 (e.filename, input_filename, e.strerror))
345
346
347 def InlineToFile(input_filename, output_filename, grd_node):
348 """Inlines the resources in a specified file and writes it.
349
350 Reads input_filename, finds all the src attributes and attempts to
351 inline the files they are referring to, then writes the result
352 to output_filename.
353
354 Args:
355 input_filename: name of file to read in
356 output_filename: name of file to be written to
357 grd_node: html node from the grd file for this include tag
358 Returns:
359 a set of filenames of all the inlined files
360 """
361 inlined_data = InlineToString(input_filename, grd_node)
362 out_file = open(output_filename, 'wb')
363 out_file.writelines(inlined_data)
364 out_file.close()
365
366
367 def GetResourceFilenames(filename, allow_external_script=False):
368 """For a grd file, returns a set of all the files that would be inline."""
369 try:
370 return DoInline(filename, None, names_only=True,
371 allow_external_script=allow_external_script).inlined_files
372 except IOError, e:
373 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
374 (e.filename, filename, e.strerror))
375
376
377 class ChromeHtml(interface.GathererBase):
378 '''Represents an HTML document.'''
379
380 def __init__(self, html):
381 '''Creates a new object that represents 'text'.
382 Args:
383 html: 'filename.html'
384 '''
385 super(type(self), self).__init__()
386 self.filename_ = html
387 self.inlined_text_ = None
388 self.scale_factors_ = []
389
390 def SetAttributes(self, attrs):
391 '''Sets node attributes used by the gatherer.
392
393 This checks the scale_factors attribute.
394
395 Args:
396 attrs: The mapping of node attributes.
397 '''
398 if 'scale_factors' in attrs:
399 self.scale_factors_ = attrs['scale_factors'].split(' ')
400
401 def GetText(self):
402 '''Returns the original text of the HTML document'''
403 return self.inlined_text_
404
405 def GetData(self, lang, encoding):
406 '''Return inlined text of the HTML document'''
407 return self.inlined_text_
408
409 def Translate(self, lang, pseudo_if_not_available=True,
410 skeleton_gatherer=None, fallback_to_english=False):
411 '''Returns this document translated.'''
412 return self.inlined_text_
413
414 def Parse(self):
415 self.inlined_text_ = InlineToString(self.filename_, None)
416
417 @staticmethod
418 def FromFile(html, extkey=None, encoding = 'utf-8'):
419 '''Creates a ChromeHtml object for the contents of 'html'. Returns a new
420 ChromeHtml object.
421
422 Args:
423 html: file('') | 'filename.html'
424 extkey: ignored
425 encoding: 'utf-8' (encoding is ignored)
426
427 Return:
428 ChromeHtml(text_of_file)
429 '''
430 if not isinstance(html, types.StringTypes):
431 html = html.name
432
433 return ChromeHtml(html)
OLDNEW
« no previous file with comments | « grit/format/data_pack.py ('k') | grit/node/structure.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698