Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(272)

Side by Side Diff: client/tools/htmlconverter.py

Issue 9977011: get rid of dependency on htmlconverter for everything but swarm. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | client/tools/htmlconverter_test.py » ('j') | samples/dartcombat/README » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file
2 # for details. All rights reserved. Use of this source code is governed by a
3 # BSD-style license that can be found in the LICENSE file.
4
5 #!/usr/bin/env python
6 #
7
8 """Rewrites HTML files, converting Dart script sections into JavaScript.
9
10 Process HTML files, and internally changes script sections that use Dart code
11 into JavaScript sections. It also can optimize the HTML to inline code.
12 """
13
14 from HTMLParser import HTMLParser
15 import os.path
16 from os.path import abspath, basename, dirname, exists, isabs, join
17 import base64, re, optparse, os, shutil, subprocess, sys, tempfile, codecs
18 import urllib2
19
20 CLIENT_PATH = dirname(dirname(abspath(__file__)))
21 DART_PATH = dirname(CLIENT_PATH)
22 TOOLS_PATH = join(DART_PATH, 'tools')
23
24 sys.path.append(TOOLS_PATH)
25 import utils
26
27 DART_MIME_TYPE = "application/dart"
28 LIBRARY_PATTERN = "^#library\(.*\);"
29 IMPORT_SOURCE_MATCHER = re.compile(
30 r"^ *(#import|#source)(\(['\"])([^'\"]*)(.*\);)", re.MULTILINE)
31 DOM_IMPORT_MATCHER = re.compile(
32 r"^#import\(['\"]dart\:dom['\"].*\);", re.MULTILINE)
33 HTML_IMPORT_MATCHER = re.compile(
34 r"^#import\(['\"]dart\:html['\"].*\);", re.MULTILINE)
35
36 FROG_NOT_FOUND_ERROR = (
37 """Couldn't find compiler: please run the following commands:
38 $ cd %s/frog
39 $ ./tools/build.py -m release""")
40
41 ENTRY_POINT = """
42 #library('entry');
43 #import('%s', prefix: 'original');
44 main() => original.main();
45 """
46
47 CSS_TEMPLATE = '<style type="text/css">%s</style>'
48 CHROMIUM_SCRIPT_TEMPLATE = '<script type="application/javascript">%s</script>'
49
50 DARTIUM_TO_JS_SCRIPT = """
51 <script type="text/javascript">
52 (function() {
53 // Let the user know that Dart is required.
54 if (!window.navigator.webkitStartDart) {
55 if (confirm(
56 "You are trying to run Dart code on a browser " +
57 "that doesn't support Dart. Do you want to redirect to " +
58 "a version compiled to JavaScript instead?")) {
59 var addr = window.location;
60 window.location = addr.toString().replace('-dart.html', '-js.html');
61 }
62 } else {
63 window.navigator.webkitStartDart();
64 }
65 })();
66 </script>
67 """
68
69 def adjustImports(contents):
70 def repl(matchobj):
71 path = matchobj.group(3)
72 if not path.startswith('dart:'):
73 path = abspath(path)
74 return (matchobj.group(1) + matchobj.group(2) + path + matchobj.group(4))
75 return IMPORT_SOURCE_MATCHER.sub(repl, contents)
76
77 class DartCompiler(object):
78 """ Common code for compiling Dart script tags in an HTML file. """
79
80 def __init__(self, verbose=False,
81 extra_flags=""):
82 self.verbose = verbose
83 self.extra_flags = extra_flags
84
85 def compileCode(self, src=None, body=None):
86 """ Compile the given source code.
87
88 Either the script tag has a src attribute or a non-empty body (one of the
89 arguments will be none, the other is not).
90
91 Args:
92 src: a string pointing to a Dart script file.
93 body: a string containing Dart code.
94 """
95
96 outdir = tempfile.mkdtemp()
97 indir = None
98 useDartHtml = False
99 if src is not None:
100 if body is not None and body.strip() != '':
101 raise ConverterException(
102 "The script body should be empty if src is specified")
103 elif src.endswith('.dart'):
104 indir = tempfile.mkdtemp()
105 inputfile = abspath(src)
106 with open(inputfile, 'r') as f:
107 contents = f.read();
108
109 if HTML_IMPORT_MATCHER.search(contents):
110 useDartHtml = True
111
112 # We will import the source file to emulate in JS that code is run after
113 # DOMContentLoaded. We need a #library to ensure #import won't fail:
114 if not re.search(LIBRARY_PATTERN, contents, re.MULTILINE):
115 inputfile = join(indir, 'code.dart')
116 with open(inputfile, 'w') as f:
117 f.write("#library('code');")
118 f.write(adjustImports(contents))
119
120 else:
121 raise ConverterException("invalid file type:" + src)
122 else:
123 if body is None or body.strip() == '':
124 # nothing to do
125 print 'Warning: empty script tag with no src attribute'
126 return ''
127
128 indir = tempfile.mkdtemp()
129 # eliminate leading spaces in front of directives
130 body = adjustImports(body)
131
132 if HTML_IMPORT_MATCHER.search(body):
133 useDartHtml = True
134
135 inputfile = join(indir, 'code.dart')
136 with open(inputfile, 'w') as f:
137 f.write("#library('inlinedcode');\n")
138 f.write(body)
139
140 wrappedfile = join(indir, 'entry.dart')
141 with open(wrappedfile, 'w') as f:
142 f.write(ENTRY_POINT % inputfile)
143
144 status, out, err = execute(self.compileCommand(wrappedfile, outdir),
145 self.verbose)
146 if status:
147 raise ConverterException('compilation errors')
148
149 # Inline the compiled code in the page
150 with open(self.outputFileName(wrappedfile, outdir), 'r') as f:
151 res = f.read()
152
153 # Cleanup
154 if indir is not None:
155 shutil.rmtree(indir)
156 shutil.rmtree(outdir)
157 return CHROMIUM_SCRIPT_TEMPLATE % res
158
159 def compileCommand(self, inputfile, outdir):
160 binary = abspath(join(DART_PATH,
161 utils.GetBuildRoot(utils.GuessOS(),
162 'release', 'ia32'),
163 'frog', 'bin', 'frogsh'))
164 if not exists(binary):
165 raise ConverterException(FROG_NOT_FOUND_ERROR % DART_PATH)
166
167 cmd = [binary, '--compile-only',
168 '--libdir=' + join(DART_PATH, 'frog', 'lib'),
169 '--out=' + self.outputFileName(inputfile, outdir)]
170 if self.extra_flags != "":
171 cmd.append(self.extra_flags);
172 cmd.append(inputfile)
173 return cmd
174
175 def outputFileName(self, inputfile, outdir):
176 return join(outdir, basename(inputfile) + '.js')
177
178 def execute(cmd, verbose=False):
179 """Execute a command in a subprocess. """
180 if verbose: print 'Executing: ' + ' '.join(cmd)
181 try:
182 pipe = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
183 output, err = pipe.communicate()
184 if pipe.returncode != 0:
185 print 'Execution failed: ' + output + '\n' + err
186 if verbose or pipe.returncode != 0:
187 print output
188 print err
189 return pipe.returncode, output, err
190 except Exception as e:
191 print 'Exception when executing: ' + ' '.join(cmd)
192 print e
193 return 1, None, None
194
195
196 def convertPath(project_path, prefix_path):
197 """ Convert a project path (whose root corresponds to the current working
198 directory) to a system path.
199 Args:
200 - project_path: path in the project context.
201 - prefix_path: prefix for relative paths.
202 """
203 if isabs(project_path):
204 # TODO(sigmund): add a flag to pass in the root-level for absolute paths.
205 return project_path[1:]
206 elif not (project_path.startswith('http://') or
207 project_path.startswith('https://')):
208 return join(prefix_path, project_path)
209 else:
210 return project_path
211
212 def encodeImage(rootDir, filename):
213 """ Returns a base64 url encoding for an image """
214 filetype = filename[-3:]
215 if filetype == 'svg': filetype = 'svg+xml'
216 with open(join(rootDir, filename), 'r') as f:
217 return 'url(data:image/%s;charset=utf-8;base64,%s)' % (
218 filetype,
219 base64.b64encode(f.read()))
220
221 def processCss(filename):
222 """ Reads and converts a css file by replacing all image refernces into
223 base64 encoded images.
224 """
225 css = open(filename, 'r').read()
226 cssDir = os.path.split(filename)[0]
227 def transformUrl(match):
228 imagefile = match.group(1)
229 # if the image is not local or can't be found, leave the url alone:
230 if (imagefile.startswith('http://')
231 or imagefile.startswith('https://')
232 or not exists(join(cssDir, imagefile))):
233 return match.group(0)
234 return encodeImage(cssDir, imagefile)
235
236 pattern = 'url\((.*\.(svg|png|jpg|gif))\)'
237 return re.sub(pattern, transformUrl, css)
238
239 class DartHTMLConverter(HTMLParser):
240 """ An HTML processor that inlines css and compiled dart code.
241
242 Args:
243 - compiler: an implementation of DartAnyCompiler
244 - prefix_path: prefix for relative paths encountered in the HTML.
245 """
246 def __init__(self, compiler, prefix_path):
247 HTMLParser.__init__(self)
248 self.in_dart_tag = False
249 self.output = []
250 self.dart_inline_code = []
251 self.contains_dart = False
252 self.compiler = compiler
253 self.prefix_path = prefix_path
254
255 def inlineCss(self, attrDic):
256 path = convertPath(attrDic['href'], self.prefix_path)
257 self.output.append(CSS_TEMPLATE % processCss(path))
258
259 def compileScript(self, attrDic):
260 if 'src' in attrDic:
261 self.output.append(self.compiler.compileCode(
262 src=convertPath(attrDic.pop('src'), self.prefix_path),
263 body=None))
264 else:
265 self.in_dart_tag = True
266 # no tag is generated until we parse the body of the tag
267 self.dart_inline_code = []
268 return True
269
270 def convertImage(self, attrDic):
271 pass
272
273 def starttagHelper(self, tag, attrs, isEnd):
274 attrDic = dict(attrs)
275
276 # collect all script files, and generate a single script before </body>
277 if (tag == 'script' and 'type' in attrDic
278 and (attrDic['type'] == DART_MIME_TYPE)):
279 if self.compileScript(attrDic):
280 return
281
282 # convert css imports into inlined css
283 elif (tag == 'link' and
284 'rel' in attrDic and attrDic['rel'] == 'stylesheet' and
285 'type' in attrDic and attrDic['type'] == 'text/css' and
286 'href' in attrDic):
287 self.inlineCss(attrDic)
288 return
289
290 elif tag == 'img' and 'src' in attrDic:
291 self.convertImage(attrDic)
292
293 # emit everything else as in the input
294 self.output.append('<%s%s%s>' % (
295 tag + (' ' if len(attrDic) else ''),
296 ' '.join(['%s="%s"' % (k, attrDic[k]) for k in attrDic]),
297 '/' if isEnd else ''))
298
299 def handle_starttag(self, tag, attrs):
300 self.starttagHelper(tag, attrs, False)
301
302 def handle_startendtag(self, tag, attrs):
303 self.starttagHelper(tag, attrs, True)
304
305 def handle_data(self, data):
306 if self.in_dart_tag:
307 # collect the dart source code and compile it all at once when no more
308 # script tags can be included. Note: the code will anyways start on
309 # DOMContentLoaded, so moving the script is OK.
310 self.dart_inline_code.append(data)
311 else:
312 self.output.append(data),
313
314 def handle_endtag(self, tag):
315 if tag == 'script' and self.in_dart_tag:
316 self.in_dart_tag = False
317 self.output.append(self.compiler.compileCode(
318 src=None, body='\n'.join(self.dart_inline_code)))
319 else:
320 self.output.append('</%s>' % tag)
321
322 def handle_charref(self, ref):
323 self.output.append('&#%s;' % ref)
324
325 def handle_entityref(self, name):
326 self.output.append('&%s;' % name)
327
328 def handle_comment(self, data):
329 self.output.append('<!--%s-->' % data)
330
331 def handle_decl(self, decl):
332 self.output.append('<!%s>' % decl)
333
334 def unknown_decl(self, data):
335 self.output.append('<!%s>' % data)
336
337 def handle_pi(self, data):
338 self.output.append('<?%s>' % data)
339
340 def getResult(self):
341 return ''.join(self.output)
342
343
344 class DartToDartHTMLConverter(DartHTMLConverter):
345 def __init__(self, prefix_path, outdir, verbose):
346 # Note: can't use super calls because HTMLParser is not a subclass of object
347 DartHTMLConverter.__init__(self, None, prefix_path)
348 self.outdir = outdir
349 self.verbose = verbose
350
351 def compileScript(self, attrDic):
352 self.contains_dart = True
353 if 'src' in attrDic:
354 status, out, err = execute([
355 sys.executable,
356 join(DART_PATH, 'tools', 'copy_dart.py'),
357 self.outdir,
358 convertPath(attrDic['src'], self.prefix_path)],
359 self.verbose)
360
361 if status:
362 raise ConverterException('exception calling copy_dart.py')
363
364 # do not rewrite the script tag
365 return False
366
367 def handle_endtag(self, tag):
368 if tag == 'body' and self.contains_dart:
369 self.output.append(DARTIUM_TO_JS_SCRIPT)
370 DartHTMLConverter.handle_endtag(self, tag)
371
372 # A data URL for a blank 1x1 PNG. The PNG's data is from
373 # convert -size 1x1 +set date:create +set date:modify \
374 # xc:'rgba(0,0,0,0)' 1x1.png
375 # base64.b64encode(open('1x1.png').read())
376 # (The +set stuff is because just doing "-strip" apparently doesn't work;
377 # it leaves several info chunks resulting in a 224-byte PNG.)
378 BLANK_IMAGE_BASE64_URL = 'data:image/png;charset=utf-8;base64,%s' % (
379 ('iVBORw0KGgoAAAANSUhEUgAAAAEAAAABEAQAAADljNBBAAAAAmJLR0T//xSrMc0AAAAJc'
380 'EhZcwAAAEgAAABIAEbJaz4AAAAJdnBBZwAAAAEAAAABAMeVX+0AAAANSURBVAjXY2BgYG'
381 'AAAAAFAAFe8yo6AAAAAElFTkSuQmCC'))
382
383 class OfflineHTMLConverter(DartHTMLConverter):
384 def __init__(self, prefix_path, outdir, verbose, inline_images):
385 # Note: can't use super calls because HTMLParser is not a subclass of object
386 DartHTMLConverter.__init__(self, None, prefix_path)
387 self.outdir = outdir
388 self.verbose = verbose
389 self.inline_images = inline_images # Inline as data://, vs. use local file.
390
391 def compileScript(self, attrDic):
392 # do not rewrite the script tag
393 return False
394
395 def downloadImageUrlToEncode(self, url):
396 """ Downloads an image and returns a base64 url encoding for it.
397 May throw if the download fails.
398 """
399 # Don't try to re-encode an image that's already data://.
400 filetype = url[-3:]
401 if filetype == 'svg': filetype = 'svg+xml'
402 if self.verbose:
403 print 'Downloading ' + url
404 f = urllib2.urlopen(url)
405
406 return 'data:image/%s;charset=utf-8;base64,%s' % (
407 filetype,
408 base64.b64encode(f.read()))
409
410 def downloadImageUrlToFile(self, url):
411 """Downloads an image and returns the filename. May throw if the
412 download fails.
413 """
414 extension = os.path.splitext(url)[1]
415 # mkstemp() happens to work to create a non-temporary, so we use it.
416 filename = tempfile.mkstemp(extension, 'img_', self.prefix_path)[1]
417 if self.verbose:
418 print 'Downloading %s to %s' % (url, filename)
419 writeOut(urllib2.urlopen(url).read(), filename)
420 return os.path.join(self.prefix_path, os.path.basename(filename))
421
422 def downloadImage(self, url):
423 """Downloads an image either to file or to data://, and return the URL."""
424 if url.startswith('data:image/'):
425 return url
426 try:
427 if self.inline_images:
428 return self.downloadImageUrlToEncode(url)
429 else:
430 return self.downloadImageUrlToFile(url)
431 except:
432 print '*** Image download failed: %s' % url
433 return BLANK_IMAGE_BASE64_URL
434
435 def convertImage(self, attrDic):
436 attrDic['src'] = self.downloadImage(attrDic['src'])
437
438 def safeMakeDirs(dirname):
439 """ Creates a directory and, if necessary its parent directories.
440
441 This function will safely return if other concurrent jobs try to create the
442 same directory.
443 """
444 if not exists(dirname):
445 try:
446 os.makedirs(dirname)
447 except Exception:
448 # this check allows invoking this script concurrently in many jobs
449 if not exists(dirname):
450 raise
451
452 class ConverterException(Exception):
453 """ An exception encountered during the convertion process """
454 pass
455
456 def Flags():
457 """ Constructs a parser for extracting flags from the command line. """
458 result = optparse.OptionParser()
459 result.add_option("--verbose",
460 help="Print verbose output",
461 default=False,
462 action="store_true")
463 result.add_option("-o", "--out",
464 help="Output directory",
465 type="string",
466 default=None,
467 action="store")
468 result.add_option("-t", "--target",
469 help="The target html to generate",
470 metavar="[js,chromium,dartium]",
471 default='chromium')
472 result.add_option("--extra-flags",
473 help="Extra flags for dartc",
474 type="string",
475 default="")
476 result.set_usage("htmlconverter.py input.html -o OUTDIR")
477 return result
478
479 def writeOut(contents, filepath):
480 """ Writes contents to a file, ensuring that the output directory exists. """
481 safeMakeDirs(dirname(filepath))
482 with open(filepath, 'w') as f:
483 f.write(contents)
484 print "Generated output in: " + abspath(filepath)
485
486 def convertForDartium(filename, outdirBase, outfile, verbose):
487 """ Converts a file for a dartium target. """
488 with open(filename, 'r') as f:
489 contents = f.read()
490 prefix_path = dirname(filename)
491
492 # outdirBase is the directory to place all subdirectories for other dart files
493 # and resources.
494 converter = DartToDartHTMLConverter(prefix_path, outdirBase, verbose)
495 converter.feed(contents)
496 converter.close()
497 writeOut(converter.getResult(), outfile)
498
499 def convertForChromium(
500 filename, extra_flags, outfile, verbose):
501 """ Converts a file for a chromium target. """
502 with open(filename, 'r') as f:
503 contents = f.read()
504 prefix_path = dirname(filename)
505 converter = DartHTMLConverter(
506 DartCompiler(verbose, extra_flags), prefix_path)
507 converter.feed(contents)
508 converter.close()
509 writeOut(converter.getResult(), outfile)
510
511 def convertForOffline(filename, outfile, verbose, encode_images):
512 """ Converts a file for offline use. """
513 with codecs.open(filename, 'r', 'utf-8') as f:
514 contents = f.read()
515 converter = OfflineHTMLConverter(dirname(filename),
516 dirname(outfile),
517 verbose,
518 encode_images)
519 converter.feed(contents)
520 converter.close()
521
522 contents = converter.getResult()
523 safeMakeDirs(dirname(outfile))
524 with codecs.open(outfile, 'w', 'utf-8') as f:
525 f.write(contents)
526 print "Generated output in: " + abspath(outfile)
527
528 RED_COLOR = "\033[31m"
529 NO_COLOR = "\033[0m"
530
531 def main():
532 parser = Flags()
533 options, args = parser.parse_args()
534 if len(args) < 1 or not options.out or not options.target:
535 parser.print_help()
536 return 1
537
538 try:
539 filename = args[0]
540 extension = filename[filename.rfind('.'):]
541 if extension != '.html' and extension != '.htm':
542 print "Invalid input file extension: %s" % extension
543 return 1
544 outfile = join(options.out, filename)
545 if 'chromium' in options.target or 'js' in options.target:
546 convertForChromium(filename,
547 options.extra_flags,
548 outfile.replace(extension, '-js' + extension), options.verbose)
549 if 'dartium' in options.target:
550 convertForDartium(filename, options.out,
551 outfile.replace(extension, '-dart' + extension), options.verbose)
552 except Exception as e:
553 print "%sERROR%s: %s" % (RED_COLOR, NO_COLOR, str(e))
554 return 1
555 return 0
556
557 if __name__ == '__main__':
558 sys.exit(main())
OLDNEW
« no previous file with comments | « no previous file | client/tools/htmlconverter_test.py » ('j') | samples/dartcombat/README » ('J')

Powered by Google App Engine
This is Rietveld 408576698