OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 |
| 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 4 # Use of this source code is governed by a BSD-style license that can be |
| 5 # found in the LICENSE file. |
| 6 |
| 7 '''This utility cleans up the html files as emitted by doxygen so |
| 8 that they are suitable for publication on a Google documentation site. |
| 9 ''' |
| 10 |
| 11 import optparse |
| 12 import os |
| 13 import re |
| 14 import shutil |
| 15 import string |
| 16 import sys |
| 17 try: |
| 18 from BeautifulSoup import BeautifulSoup, Tag |
| 19 except (ImportError, NotImplementedError): |
| 20 print ("This tool requires the BeautifulSoup package " |
| 21 "(see http://www.crummy.com/software/BeautifulSoup/).\n" |
| 22 "Make sure that the file BeautifulSoup.py is either in this directory " |
| 23 "or is available in your PYTHON_PATH") |
| 24 raise |
| 25 |
| 26 |
| 27 class HTMLFixer(object): |
| 28 '''This class cleans up the html strings as produced by Doxygen |
| 29 ''' |
| 30 |
| 31 def __init__(self, html): |
| 32 self.soup = BeautifulSoup(html) |
| 33 |
| 34 def FixTableHeadings(self): |
| 35 '''Fixes the doxygen table headings. |
| 36 |
| 37 This includes: |
| 38 - Using bare <h2> title row instead of row embedded in <tr><td> in table |
| 39 - Putting the "name" attribute into the "id" attribute of the <tr> tag. |
| 40 - Splitting up tables into multiple separate tables if a table |
| 41 heading appears in the middle of a table. |
| 42 |
| 43 For example, this html: |
| 44 <table> |
| 45 <tr><td colspan="2"><h2><a name="pub-attribs"></a> |
| 46 Data Fields List</h2></td></tr> |
| 47 ... |
| 48 </table> |
| 49 |
| 50 would be converted to this: |
| 51 <h2>Data Fields List</h2> |
| 52 <table> |
| 53 ... |
| 54 </table> |
| 55 ''' |
| 56 |
| 57 table_headers = [] |
| 58 for tag in self.soup.findAll('tr'): |
| 59 if tag.td and tag.td.h2 and tag.td.h2.a and tag.td.h2.a['name']: |
| 60 #tag['id'] = tag.td.h2.a['name'] |
| 61 tag.string = tag.td.h2.a.next |
| 62 tag.name = 'h2' |
| 63 table_headers.append(tag) |
| 64 |
| 65 # reverse the list so that earlier tags don't delete later tags |
| 66 table_headers.reverse() |
| 67 # Split up tables that have multiple table header (th) rows |
| 68 for tag in table_headers: |
| 69 print "Header tag: %s is %s" % (tag.name, tag.string.strip()) |
| 70 # Is this a heading in the middle of a table? |
| 71 if tag.findPreviousSibling('tr') and tag.parent.name == 'table': |
| 72 print "Splitting Table named %s" % tag.string.strip() |
| 73 table = tag.parent |
| 74 table_parent = table.parent |
| 75 table_index = table_parent.contents.index(table) |
| 76 new_table = Tag(self.soup, name='table', attrs=table.attrs) |
| 77 table_parent.insert(table_index + 1, new_table) |
| 78 tag_index = table.contents.index(tag) |
| 79 for index, row in enumerate(table.contents[tag_index:]): |
| 80 new_table.insert(index, row) |
| 81 # Now move the <h2> tag to be in front of the <table> tag |
| 82 assert tag.parent.name == 'table' |
| 83 table = tag.parent |
| 84 table_parent = table.parent |
| 85 table_index = table_parent.contents.index(table) |
| 86 table_parent.insert(table_index, tag) |
| 87 |
| 88 def RemoveTopHeadings(self): |
| 89 '''Removes <div> sections with a header, tabs, or navpath class attribute''' |
| 90 header_tags = self.soup.findAll( |
| 91 name='div', |
| 92 attrs={'class' : re.compile('^(header|tabs[0-9]*|navpath)$')}) |
| 93 [tag.extract() for tag in header_tags] |
| 94 |
| 95 def FixAll(self): |
| 96 self.FixTableHeadings() |
| 97 self.RemoveTopHeadings() |
| 98 |
| 99 def __str__(self): |
| 100 return str(self.soup) |
| 101 |
| 102 |
| 103 def main(): |
| 104 '''Main entry for the doxy_cleanup utility |
| 105 |
| 106 doxy_cleanup takes a list of html files and modifies them in place.''' |
| 107 |
| 108 parser = optparse.OptionParser(usage='Usage: %prog [options] files...') |
| 109 |
| 110 parser.add_option('-m', '--move', dest='move', action='store_true', |
| 111 default=False, help='move html files to "original_html"') |
| 112 |
| 113 options, files = parser.parse_args() |
| 114 |
| 115 if not files: |
| 116 parser.print_usage() |
| 117 return 1 |
| 118 |
| 119 for filename in files: |
| 120 try: |
| 121 with open(filename, 'r') as file: |
| 122 html = file.read() |
| 123 |
| 124 print "Processing %s" % filename |
| 125 fixer = HTMLFixer(html) |
| 126 fixer.FixAll() |
| 127 with open(filename, 'w') as file: |
| 128 file.write(str(fixer)) |
| 129 if options.move: |
| 130 new_directory = os.path.join( |
| 131 os.path.dirname(os.path.dirname(filename)), 'original_html') |
| 132 if not os.path.exists(new_directory): |
| 133 os.mkdir(new_directory) |
| 134 shutil.move(filename, new_directory) |
| 135 except: |
| 136 print "Error while processing %s" % filename |
| 137 raise |
| 138 |
| 139 return 0 |
| 140 |
| 141 if __name__ == '__main__': |
| 142 sys.exit(main()) |
OLD | NEW |