Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(256)

Side by Side Diff: grit/format/html_inline.py

Issue 10386189: Add chrome_html gatherer, which inlines html and automatically generates image set… (Closed) Base URL: http://git.chromium.org/external/grit-i18n.git@master
Patch Set: Add chrome_html_unittest to test_suite_all. Created 8 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « grit/format/data_pack.py ('k') | grit/gather/chrome_html.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Flattens a HTML file by inlining its external resources. 6 """Flattens a HTML file by inlining its external resources.
7 7
8 This is a small script that takes a HTML file, looks for src attributes 8 This is a small script that takes a HTML file, looks for src attributes
9 and inlines the specified file, producing one HTML file with no external 9 and inlines the specified file, producing one HTML file with no external
10 dependencies. It recursively inlines the included files. 10 dependencies. It recursively inlines the included files.
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 """Helper class holding the results from DoInline(). 88 """Helper class holding the results from DoInline().
89 89
90 Holds the inlined data and the set of filenames of all the inlined 90 Holds the inlined data and the set of filenames of all the inlined
91 files. 91 files.
92 """ 92 """
93 def __init__(self, inlined_data, inlined_files): 93 def __init__(self, inlined_data, inlined_files):
94 self.inlined_data = inlined_data 94 self.inlined_data = inlined_data
95 self.inlined_files = inlined_files 95 self.inlined_files = inlined_files
96 96
97 def DoInline( 97 def DoInline(
98 input_filename, grd_node, allow_external_script=False, names_only=False): 98 input_filename, grd_node, allow_external_script=False, names_only=False,
99 rewrite_function=None):
99 """Helper function that inlines the resources in a specified file. 100 """Helper function that inlines the resources in a specified file.
100 101
101 Reads input_filename, finds all the src attributes and attempts to 102 Reads input_filename, finds all the src attributes and attempts to
102 inline the files they are referring to, then returns the result and 103 inline the files they are referring to, then returns the result and
103 the set of inlined files. 104 the set of inlined files.
104 105
105 Args: 106 Args:
106 input_filename: name of file to read in 107 input_filename: name of file to read in
107 grd_node: html node from the grd file for this include tag 108 grd_node: html node from the grd file for this include tag
108 names_only: |nil| will be returned for the inlined contents (faster). 109 names_only: |nil| will be returned for the inlined contents (faster).
110 rewrite_function: function(filepath, text, distribution) which will be
111 called to rewrite html content before inlining images.
109 Returns: 112 Returns:
110 a tuple of the inlined data as a string and the set of filenames 113 a tuple of the inlined data as a string and the set of filenames
111 of all the inlined files 114 of all the inlined files
112 """ 115 """
113 input_filepath = os.path.dirname(input_filename) 116 input_filepath = os.path.dirname(input_filename)
114 117
115 distribution = DIST_DEFAULT 118 distribution = DIST_DEFAULT
116 if DIST_ENV_VAR in os.environ.keys(): 119 if DIST_ENV_VAR in os.environ.keys():
117 distribution = os.environ[DIST_ENV_VAR] 120 distribution = os.environ[DIST_ENV_VAR]
118 if len(distribution) > 1 and distribution[0] == '_': 121 if len(distribution) > 1 and distribution[0] == '_':
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after
201 def InlineScript(match): 204 def InlineScript(match):
202 """Helper function to inline external script files""" 205 """Helper function to inline external script files"""
203 attrs = (match.group('attrs1') + match.group('attrs2')).strip() 206 attrs = (match.group('attrs1') + match.group('attrs2')).strip()
204 if attrs: 207 if attrs:
205 attrs = ' ' + attrs 208 attrs = ' ' + attrs
206 return InlineFileContents(match, '<script' + attrs + '>%s</script>') 209 return InlineFileContents(match, '<script' + attrs + '>%s</script>')
207 210
208 def InlineCSSText(text, css_filepath): 211 def InlineCSSText(text, css_filepath):
209 """Helper function that inlines external resources in CSS text""" 212 """Helper function that inlines external resources in CSS text"""
210 filepath = os.path.dirname(css_filepath) 213 filepath = os.path.dirname(css_filepath)
214 # Allow custom modifications before inlining images.
215 if rewrite_function:
216 text = rewrite_function(filepath, text, distribution)
211 return InlineCSSImages(text, filepath) 217 return InlineCSSImages(text, filepath)
212 218
213 def InlineCSSFile(src_match, inlined_files=inlined_files): 219 def InlineCSSFile(src_match, inlined_files=inlined_files):
214 """Helper function to inline external css files. 220 """Helper function to inline external css files.
215 221
216 Args: 222 Args:
217 src_match: A regular expression match with a named group named "filename". 223 src_match: A regular expression match with a named group named "filename".
218 224
219 Returns: 225 Returns:
220 The text that should replace the reference to the CSS file. 226 The text that should replace the reference to the CSS file.
221 """ 227 """
222 filepath = GetFilepath(src_match) 228 filepath = GetFilepath(src_match)
223 if filepath is None: 229 if filepath is None:
224 return src_match.group(0) 230 return src_match.group(0)
225 231
226 # Even if names_only is set, the CSS file needs to be opened, because it 232 # Even if names_only is set, the CSS file needs to be opened, because it
227 # can link to images that need to be added to the file set. 233 # can link to images that need to be added to the file set.
228 inlined_files.add(filepath) 234 inlined_files.add(filepath)
229 # When resolving CSS files we need to pass in the path so that relative URLs 235 # When resolving CSS files we need to pass in the path so that relative URLs
230 # can be resolved. 236 # can be resolved.
231 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) 237 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath)
232 238
233 def InlineCSSImages(text, filepath=input_filepath): 239 def InlineCSSImages(text, filepath=input_filepath):
234 """Helper function that inlines external images in CSS backgrounds.""" 240 """Helper function that inlines external images in CSS backgrounds."""
235 # Replace contents of url() for css attributes: content, background, 241 # Replace contents of url() for css attributes: content, background,
236 # or *-image. 242 # or *-image.
237 return re.sub('(?:content|background|[\w-]*-image):[ ]*' + 243 return re.sub('(?:content|background|[\w-]*-image):[^;]*' +
238 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', 244 '(?:url\((?:\'|\")([^"\'\)\(]*)(?:\'|\")\)|' +
245 'image-set\(' +
246 '([ ]*url\((?:\'|\")([^"\'\)\(]*)(?:\'|\")\)' +
247 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)*\))',
248 lambda m: InlineCSSUrls(m, filepath),
249 text)
250
251 def InlineCSSUrls(src_match, filepath=input_filepath):
252 """Helper function that inlines each url on a CSS image rule match."""
253 # Replace contents of url() references in matches.
254 return re.sub('url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")',
239 lambda m: SrcReplace(m, filepath), 255 lambda m: SrcReplace(m, filepath),
240 text) 256 src_match.group(0))
257
258
241 259
242 flat_text = ReadFile(input_filename) 260 flat_text = ReadFile(input_filename)
243 261
244 if not allow_external_script: 262 if not allow_external_script:
245 # We need to inline css and js before we inline images so that image 263 # We need to inline css and js before we inline images so that image
246 # references gets inlined in the css and js 264 # references gets inlined in the css and js
247 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + 265 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' +
248 '(?P<attrs2>.*?)></script>', 266 '(?P<attrs2>.*?)></script>',
249 InlineScript, 267 InlineScript,
250 flat_text) 268 flat_text)
251 269
252 flat_text = re.sub( 270 flat_text = re.sub(
253 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', 271 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>',
254 InlineCSSFile, 272 InlineCSSFile,
255 flat_text) 273 flat_text)
256 274
257 flat_text = re.sub( 275 flat_text = re.sub(
258 '<include\s+src="(?P<filename>[^"\']*)".*>', 276 '<include\s+src="(?P<filename>[^"\']*)".*>',
259 InlineIncludeFiles, 277 InlineIncludeFiles,
260 flat_text) 278 flat_text)
261 279
262 # Check conditional elements, remove unsatisfied ones from the file. 280 # Check conditional elements, remove unsatisfied ones from the file.
263 flat_text = CheckConditionalElements(flat_text) 281 flat_text = CheckConditionalElements(flat_text)
264 282
265 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', 283 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"',
266 SrcReplace, 284 SrcReplace,
267 flat_text) 285 flat_text)
268 286
287 # Allow custom modifications before inlining images.
288 if rewrite_function:
289 flat_text = rewrite_function(input_filepath, flat_text, distribution)
290
269 # TODO(arv): Only do this inside <style> tags. 291 # TODO(arv): Only do this inside <style> tags.
270 flat_text = InlineCSSImages(flat_text) 292 flat_text = InlineCSSImages(flat_text)
271 293
272 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', 294 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"',
273 SrcReplace, 295 SrcReplace,
274 flat_text) 296 flat_text)
275 297
276 if names_only: 298 if names_only:
277 flat_text = None # Will contains garbage if the flag is set anyway. 299 flat_text = None # Will contains garbage if the flag is set anyway.
278 return InlinedData(flat_text, inlined_files) 300 return InlinedData(flat_text, inlined_files)
279 301
280 302
281 def InlineToString(input_filename, grd_node, allow_external_script=False): 303 def InlineToString(input_filename, grd_node, allow_external_script=False,
304 rewrite_function=None):
282 """Inlines the resources in a specified file and returns it as a string. 305 """Inlines the resources in a specified file and returns it as a string.
283 306
284 Args: 307 Args:
285 input_filename: name of file to read in 308 input_filename: name of file to read in
286 grd_node: html node from the grd file for this include tag 309 grd_node: html node from the grd file for this include tag
287 Returns: 310 Returns:
288 the inlined data as a string 311 the inlined data as a string
289 """ 312 """
290 try: 313 try:
291 return DoInline(input_filename, 314 return DoInline(input_filename,
292 grd_node, 315 grd_node,
293 allow_external_script=allow_external_script).inlined_data 316 allow_external_script=allow_external_script,
317 rewrite_function=rewrite_function).inlined_data
294 except IOError, e: 318 except IOError, e:
295 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % 319 raise Exception("Failed to open %s while trying to flatten %s. (%s)" %
296 (e.filename, input_filename, e.strerror)) 320 (e.filename, input_filename, e.strerror))
297 321
298 322
299 def InlineToFile(input_filename, output_filename, grd_node): 323 def InlineToFile(input_filename, output_filename, grd_node):
300 """Inlines the resources in a specified file and writes it. 324 """Inlines the resources in a specified file and writes it.
301 325
302 Reads input_filename, finds all the src attributes and attempts to 326 Reads input_filename, finds all the src attributes and attempts to
303 inline the files they are referring to, then writes the result 327 inline the files they are referring to, then writes the result
(...skipping 24 matching lines...) Expand all
328 352
329 def main(): 353 def main():
330 if len(sys.argv) <= 2: 354 if len(sys.argv) <= 2:
331 print "Flattens a HTML file by inlining its external resources.\n" 355 print "Flattens a HTML file by inlining its external resources.\n"
332 print "html_inline.py inputfile outputfile" 356 print "html_inline.py inputfile outputfile"
333 else: 357 else:
334 InlineToFile(sys.argv[1], sys.argv[2], None) 358 InlineToFile(sys.argv[1], sys.argv[2], None)
335 359
336 if __name__ == '__main__': 360 if __name__ == '__main__':
337 main() 361 main()
OLDNEW
« no previous file with comments | « grit/format/data_pack.py ('k') | grit/gather/chrome_html.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698