OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Flattens a HTML file by inlining its external resources. | 6 """Flattens a HTML file by inlining its external resources. |
7 | 7 |
8 This is a small script that takes a HTML file, looks for src attributes | 8 This is a small script that takes a HTML file, looks for src attributes |
9 and inlines the specified file, producing one HTML file with no external | 9 and inlines the specified file, producing one HTML file with no external |
10 dependencies. It recursively inlines the included files. | 10 dependencies. It recursively inlines the included files. |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 """Helper class holding the results from DoInline(). | 88 """Helper class holding the results from DoInline(). |
89 | 89 |
90 Holds the inlined data and the set of filenames of all the inlined | 90 Holds the inlined data and the set of filenames of all the inlined |
91 files. | 91 files. |
92 """ | 92 """ |
93 def __init__(self, inlined_data, inlined_files): | 93 def __init__(self, inlined_data, inlined_files): |
94 self.inlined_data = inlined_data | 94 self.inlined_data = inlined_data |
95 self.inlined_files = inlined_files | 95 self.inlined_files = inlined_files |
96 | 96 |
97 def DoInline( | 97 def DoInline( |
98 input_filename, grd_node, allow_external_script=False, names_only=False): | 98 input_filename, grd_node, allow_external_script=False, names_only=False, |
| 99 rewrite_function=None): |
99 """Helper function that inlines the resources in a specified file. | 100 """Helper function that inlines the resources in a specified file. |
100 | 101 |
101 Reads input_filename, finds all the src attributes and attempts to | 102 Reads input_filename, finds all the src attributes and attempts to |
102 inline the files they are referring to, then returns the result and | 103 inline the files they are referring to, then returns the result and |
103 the set of inlined files. | 104 the set of inlined files. |
104 | 105 |
105 Args: | 106 Args: |
106 input_filename: name of file to read in | 107 input_filename: name of file to read in |
107 grd_node: html node from the grd file for this include tag | 108 grd_node: html node from the grd file for this include tag |
108 names_only: |nil| will be returned for the inlined contents (faster). | 109 names_only: |nil| will be returned for the inlined contents (faster). |
| 110 rewrite_function: function(filepath, text, distribution) which will be |
| 111 called to rewrite html content before inlining images. |
109 Returns: | 112 Returns: |
110 a tuple of the inlined data as a string and the set of filenames | 113 a tuple of the inlined data as a string and the set of filenames |
111 of all the inlined files | 114 of all the inlined files |
112 """ | 115 """ |
113 input_filepath = os.path.dirname(input_filename) | 116 input_filepath = os.path.dirname(input_filename) |
114 | 117 |
115 distribution = DIST_DEFAULT | 118 distribution = DIST_DEFAULT |
116 if DIST_ENV_VAR in os.environ.keys(): | 119 if DIST_ENV_VAR in os.environ.keys(): |
117 distribution = os.environ[DIST_ENV_VAR] | 120 distribution = os.environ[DIST_ENV_VAR] |
118 if len(distribution) > 1 and distribution[0] == '_': | 121 if len(distribution) > 1 and distribution[0] == '_': |
(...skipping 82 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
201 def InlineScript(match): | 204 def InlineScript(match): |
202 """Helper function to inline external script files""" | 205 """Helper function to inline external script files""" |
203 attrs = (match.group('attrs1') + match.group('attrs2')).strip() | 206 attrs = (match.group('attrs1') + match.group('attrs2')).strip() |
204 if attrs: | 207 if attrs: |
205 attrs = ' ' + attrs | 208 attrs = ' ' + attrs |
206 return InlineFileContents(match, '<script' + attrs + '>%s</script>') | 209 return InlineFileContents(match, '<script' + attrs + '>%s</script>') |
207 | 210 |
208 def InlineCSSText(text, css_filepath): | 211 def InlineCSSText(text, css_filepath): |
209 """Helper function that inlines external resources in CSS text""" | 212 """Helper function that inlines external resources in CSS text""" |
210 filepath = os.path.dirname(css_filepath) | 213 filepath = os.path.dirname(css_filepath) |
| 214 # Allow custom modifications before inlining images. |
| 215 if rewrite_function: |
| 216 text = rewrite_function(filepath, text, distribution) |
211 return InlineCSSImages(text, filepath) | 217 return InlineCSSImages(text, filepath) |
212 | 218 |
213 def InlineCSSFile(src_match, inlined_files=inlined_files): | 219 def InlineCSSFile(src_match, inlined_files=inlined_files): |
214 """Helper function to inline external css files. | 220 """Helper function to inline external css files. |
215 | 221 |
216 Args: | 222 Args: |
217 src_match: A regular expression match with a named group named "filename". | 223 src_match: A regular expression match with a named group named "filename". |
218 | 224 |
219 Returns: | 225 Returns: |
220 The text that should replace the reference to the CSS file. | 226 The text that should replace the reference to the CSS file. |
221 """ | 227 """ |
222 filepath = GetFilepath(src_match) | 228 filepath = GetFilepath(src_match) |
223 if filepath is None: | 229 if filepath is None: |
224 return src_match.group(0) | 230 return src_match.group(0) |
225 | 231 |
226 # Even if names_only is set, the CSS file needs to be opened, because it | 232 # Even if names_only is set, the CSS file needs to be opened, because it |
227 # can link to images that need to be added to the file set. | 233 # can link to images that need to be added to the file set. |
228 inlined_files.add(filepath) | 234 inlined_files.add(filepath) |
229 # When resolving CSS files we need to pass in the path so that relative URLs | 235 # When resolving CSS files we need to pass in the path so that relative URLs |
230 # can be resolved. | 236 # can be resolved. |
231 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) | 237 return '<style>%s</style>' % InlineCSSText(ReadFile(filepath), filepath) |
232 | 238 |
233 def InlineCSSImages(text, filepath=input_filepath): | 239 def InlineCSSImages(text, filepath=input_filepath): |
234 """Helper function that inlines external images in CSS backgrounds.""" | 240 """Helper function that inlines external images in CSS backgrounds.""" |
235 # Replace contents of url() for css attributes: content, background, | 241 # Replace contents of url() for css attributes: content, background, |
236 # or *-image. | 242 # or *-image. |
237 return re.sub('(?:content|background|[\w-]*-image):[ ]*' + | 243 return re.sub('(?:content|background|[\w-]*-image):[^;]*' + |
238 'url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', | 244 '(?:url\((?:\'|\")([^"\'\)\(]*)(?:\'|\")\)|' + |
| 245 'image-set\(' + |
| 246 '([ ]*url\((?:\'|\")([^"\'\)\(]*)(?:\'|\")\)' + |
| 247 '[ ]*[0-9.]*x[ ]*(,[ ]*)?)*\))', |
| 248 lambda m: InlineCSSUrls(m, filepath), |
| 249 text) |
| 250 |
| 251 def InlineCSSUrls(src_match, filepath=input_filepath): |
| 252 """Helper function that inlines each url on a CSS image rule match.""" |
| 253 # Replace contents of url() references in matches. |
| 254 return re.sub('url\((?:\'|\")(?P<filename>[^"\'\)\(]*)(?:\'|\")', |
239 lambda m: SrcReplace(m, filepath), | 255 lambda m: SrcReplace(m, filepath), |
240 text) | 256 src_match.group(0)) |
| 257 |
| 258 |
241 | 259 |
242 flat_text = ReadFile(input_filename) | 260 flat_text = ReadFile(input_filename) |
243 | 261 |
244 if not allow_external_script: | 262 if not allow_external_script: |
245 # We need to inline css and js before we inline images so that image | 263 # We need to inline css and js before we inline images so that image |
246 # references gets inlined in the css and js | 264 # references gets inlined in the css and js |
247 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + | 265 flat_text = re.sub('<script (?P<attrs1>.*?)src="(?P<filename>[^"\']*)"' + |
248 '(?P<attrs2>.*?)></script>', | 266 '(?P<attrs2>.*?)></script>', |
249 InlineScript, | 267 InlineScript, |
250 flat_text) | 268 flat_text) |
251 | 269 |
252 flat_text = re.sub( | 270 flat_text = re.sub( |
253 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', | 271 '<link rel="stylesheet".+?href="(?P<filename>[^"]*)".*?>', |
254 InlineCSSFile, | 272 InlineCSSFile, |
255 flat_text) | 273 flat_text) |
256 | 274 |
257 flat_text = re.sub( | 275 flat_text = re.sub( |
258 '<include\s+src="(?P<filename>[^"\']*)".*>', | 276 '<include\s+src="(?P<filename>[^"\']*)".*>', |
259 InlineIncludeFiles, | 277 InlineIncludeFiles, |
260 flat_text) | 278 flat_text) |
261 | 279 |
262 # Check conditional elements, remove unsatisfied ones from the file. | 280 # Check conditional elements, remove unsatisfied ones from the file. |
263 flat_text = CheckConditionalElements(flat_text) | 281 flat_text = CheckConditionalElements(flat_text) |
264 | 282 |
265 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', | 283 flat_text = re.sub('<(?!script)[^>]+?src="(?P<filename>[^"\']*)"', |
266 SrcReplace, | 284 SrcReplace, |
267 flat_text) | 285 flat_text) |
268 | 286 |
| 287 # Allow custom modifications before inlining images. |
| 288 if rewrite_function: |
| 289 flat_text = rewrite_function(input_filepath, flat_text, distribution) |
| 290 |
269 # TODO(arv): Only do this inside <style> tags. | 291 # TODO(arv): Only do this inside <style> tags. |
270 flat_text = InlineCSSImages(flat_text) | 292 flat_text = InlineCSSImages(flat_text) |
271 | 293 |
272 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', | 294 flat_text = re.sub('<link rel="icon".+?href="(?P<filename>[^"\']*)"', |
273 SrcReplace, | 295 SrcReplace, |
274 flat_text) | 296 flat_text) |
275 | 297 |
276 if names_only: | 298 if names_only: |
277 flat_text = None # Will contains garbage if the flag is set anyway. | 299 flat_text = None # Will contains garbage if the flag is set anyway. |
278 return InlinedData(flat_text, inlined_files) | 300 return InlinedData(flat_text, inlined_files) |
279 | 301 |
280 | 302 |
281 def InlineToString(input_filename, grd_node, allow_external_script=False): | 303 def InlineToString(input_filename, grd_node, allow_external_script=False, |
| 304 rewrite_function=None): |
282 """Inlines the resources in a specified file and returns it as a string. | 305 """Inlines the resources in a specified file and returns it as a string. |
283 | 306 |
284 Args: | 307 Args: |
285 input_filename: name of file to read in | 308 input_filename: name of file to read in |
286 grd_node: html node from the grd file for this include tag | 309 grd_node: html node from the grd file for this include tag |
287 Returns: | 310 Returns: |
288 the inlined data as a string | 311 the inlined data as a string |
289 """ | 312 """ |
290 try: | 313 try: |
291 return DoInline(input_filename, | 314 return DoInline(input_filename, |
292 grd_node, | 315 grd_node, |
293 allow_external_script=allow_external_script).inlined_data | 316 allow_external_script=allow_external_script, |
| 317 rewrite_function=rewrite_function).inlined_data |
294 except IOError, e: | 318 except IOError, e: |
295 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % | 319 raise Exception("Failed to open %s while trying to flatten %s. (%s)" % |
296 (e.filename, input_filename, e.strerror)) | 320 (e.filename, input_filename, e.strerror)) |
297 | 321 |
298 | 322 |
299 def InlineToFile(input_filename, output_filename, grd_node): | 323 def InlineToFile(input_filename, output_filename, grd_node): |
300 """Inlines the resources in a specified file and writes it. | 324 """Inlines the resources in a specified file and writes it. |
301 | 325 |
302 Reads input_filename, finds all the src attributes and attempts to | 326 Reads input_filename, finds all the src attributes and attempts to |
303 inline the files they are referring to, then writes the result | 327 inline the files they are referring to, then writes the result |
(...skipping 24 matching lines...) Expand all Loading... |
328 | 352 |
329 def main(): | 353 def main(): |
330 if len(sys.argv) <= 2: | 354 if len(sys.argv) <= 2: |
331 print "Flattens a HTML file by inlining its external resources.\n" | 355 print "Flattens a HTML file by inlining its external resources.\n" |
332 print "html_inline.py inputfile outputfile" | 356 print "html_inline.py inputfile outputfile" |
333 else: | 357 else: |
334 InlineToFile(sys.argv[1], sys.argv[2], None) | 358 InlineToFile(sys.argv[1], sys.argv[2], None) |
335 | 359 |
336 if __name__ == '__main__': | 360 if __name__ == '__main__': |
337 main() | 361 main() |
OLD | NEW |