OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 1376 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1387 logging.info('Running: %s' % cmd) | 1387 logging.info('Running: %s' % cmd) |
1388 signal = 'Go!' | 1388 signal = 'Go!' |
1389 logging.debug('Our pid: %d' % os.getpid()) | 1389 logging.debug('Our pid: %d' % os.getpid()) |
1390 | 1390 |
1391 # Part 1: start the child process. | 1391 # Part 1: start the child process. |
1392 stdout = stderr = None | 1392 stdout = stderr = None |
1393 if output: | 1393 if output: |
1394 stdout = subprocess.PIPE | 1394 stdout = subprocess.PIPE |
1395 stderr = subprocess.STDOUT | 1395 stderr = subprocess.STDOUT |
1396 child_cmd = [ | 1396 child_cmd = [ |
1397 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), | 1397 sys.executable, |
| 1398 os.path.join(BASE_DIR, 'trace_child_process.py'), |
| 1399 '--wait', |
1398 ] | 1400 ] |
1399 child = subprocess.Popen( | 1401 child = subprocess.Popen( |
1400 child_cmd + cmd, | 1402 child_cmd + cmd, |
1401 stdin=subprocess.PIPE, | 1403 stdin=subprocess.PIPE, |
1402 stdout=stdout, | 1404 stdout=stdout, |
1403 stderr=stderr, | 1405 stderr=stderr, |
1404 cwd=cwd) | 1406 cwd=cwd) |
1405 logging.debug('Started child pid: %d' % child.pid) | 1407 logging.debug('Started child pid: %d' % child.pid) |
1406 | 1408 |
1407 # Part 2: start dtrace process. | 1409 # Part 2: start dtrace process. |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1482 print >> sys.stderr, '\n'.join(lines) | 1484 print >> sys.stderr, '\n'.join(lines) |
1483 raise | 1485 raise |
1484 with open(logname, 'wb') as logfile: | 1486 with open(logname, 'wb') as logfile: |
1485 logfile.write(''.join(lines)) | 1487 logfile.write(''.join(lines)) |
1486 | 1488 |
1487 | 1489 |
1488 class LogmanTrace(ApiBase): | 1490 class LogmanTrace(ApiBase): |
1489 """Uses the native Windows ETW based tracing functionality to trace a child | 1491 """Uses the native Windows ETW based tracing functionality to trace a child |
1490 process. | 1492 process. |
1491 | 1493 |
1492 Caveat: this implementations doesn't track cwd or initial_cwd. | 1494 Caveat: this implementations doesn't track cwd or initial_cwd. It is because |
| 1495 the Windows Kernel doesn't have a concept of 'current working directory' at |
| 1496 all. A Win32 process has a map of current directories, one per drive letter |
| 1497 and it is managed by the user mode kernel32.dll. In kernel, a file is always |
| 1498 opened relative to another file_object or as an absolute path. All the current |
| 1499 working directory logic is done in user mode. |
1493 """ | 1500 """ |
1494 class Context(ApiBase.Context): | 1501 class Context(ApiBase.Context): |
1495 """Processes a ETW log line and keeps the list of existent and non | 1502 """Processes a ETW log line and keeps the list of existent and non |
1496 existent files accessed. | 1503 existent files accessed. |
1497 | 1504 |
1498 Ignores directories. | 1505 Ignores directories. |
1499 """ | 1506 """ |
1500 # Only the useful headers common to all entries are listed there. Any column | 1507 # Only the useful headers common to all entries are listed there. Any column |
1501 # at 19 or higher is dependent on the specific event. | 1508 # at 19 or higher is dependent on the specific event. |
1502 EVENT_NAME = 0 | 1509 EVENT_NAME = 0 |
1503 TYPE = 1 | 1510 TYPE = 1 |
1504 PID = 9 | 1511 PID = 9 |
1505 TID = 10 | 1512 TID = 10 |
1506 PROCESSOR_ID = 11 | 1513 PROCESSOR_ID = 11 |
1507 TIMESTAMP = 16 | 1514 TIMESTAMP = 16 |
1508 | 1515 |
1509 class Process(ApiBase.Context.Process): | 1516 class Process(ApiBase.Context.Process): |
1510 def __init__(self, *args): | 1517 def __init__(self, *args): |
1511 super(LogmanTrace.Context.Process, self).__init__(*args) | 1518 super(LogmanTrace.Context.Process, self).__init__(*args) |
1512 # Handle file objects that succeeded. | 1519 # Handle file objects that succeeded. |
1513 self.file_objects = {} | 1520 self.file_objects = {} |
1514 | 1521 |
1515 def __init__(self, blacklist): | 1522 def __init__(self, blacklist, tracer_pid): |
1516 super(LogmanTrace.Context, self).__init__(blacklist) | 1523 super(LogmanTrace.Context, self).__init__(blacklist) |
1517 self._drive_map = DosDriveMap() | 1524 self._drive_map = DosDriveMap() |
1518 # Threads mapping to the corresponding process id. | 1525 # Threads mapping to the corresponding process id. |
1519 self._threads_active = {} | 1526 self._threads_active = {} |
1520 # Process ID of the tracer, e.g. tracer_inputs.py | 1527 # Process ID of the tracer, e.g. tracer_inputs.py |
1521 self._tracer_pid = None | 1528 self._tracer_pid = tracer_pid |
1522 # First process to be started by self._tracer_pid. | 1529 # First process to be started by self._tracer_pid is the executable |
| 1530 # traced. |
1523 self._initial_pid = None | 1531 self._initial_pid = None |
1524 self._line_number = 0 | 1532 self._line_number = 0 |
1525 | 1533 |
1526 def on_csv_line(self, line): | 1534 def on_csv_line(self, line): |
1527 """Processes a CSV Event line.""" | 1535 """Processes a CSV Event line.""" |
1528 # So much white space! | 1536 # So much white space! |
1529 line = [i.strip() for i in line] | 1537 line = [i.strip() for i in line] |
1530 self._line_number += 1 | 1538 self._line_number += 1 |
1531 if self._line_number == 1: | 1539 if self._line_number == 1: |
1532 assert line == [ | 1540 assert line == [ |
(...skipping 166 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1699 def handle_FileIo_Rename(self, line): | 1707 def handle_FileIo_Rename(self, line): |
1700 # TODO(maruel): Handle? | 1708 # TODO(maruel): Handle? |
1701 pass | 1709 pass |
1702 | 1710 |
1703 def handle_FileIo_Any(self, line): | 1711 def handle_FileIo_Any(self, line): |
1704 pass | 1712 pass |
1705 | 1713 |
1706 def handle_Process_Any(self, line): | 1714 def handle_Process_Any(self, line): |
1707 pass | 1715 pass |
1708 | 1716 |
1709 def handle_Process_DCStart(self, line): | |
1710 """Gives historic information about the process tree. | |
1711 | |
1712 Use it to extract the pid of the trace_inputs.py parent process that | |
1713 started logman.exe. | |
1714 """ | |
1715 #UNIQUE_PROCESS_KEY = 19 | |
1716 #PROCESS_ID = 20 | |
1717 PARENT_PID = 21 | |
1718 #SESSION_ID = 22 | |
1719 #EXIT_STATUS = 23 | |
1720 #DIRECTORY_TABLE_BASE = 24 | |
1721 #USER_SID = 25 | |
1722 IMAGE_FILE_NAME = 26 | |
1723 #COMMAND_LINE = 27 | |
1724 | |
1725 ppid = int(line[PARENT_PID], 16) | |
1726 if line[IMAGE_FILE_NAME] == '"logman.exe"': | |
1727 # logman's parent is trace_input.py or whatever tool using it as a | |
1728 # library. Trace any other children started by it. | |
1729 assert not self._tracer_pid | |
1730 self._tracer_pid = ppid | |
1731 logging.info('Found logman\'s parent at %d' % ppid) | |
1732 | |
1733 def handle_Process_End(self, line): | 1717 def handle_Process_End(self, line): |
1734 # Look if it is logman terminating, if so, grab the parent's process pid | |
1735 # and inject cwd. | |
1736 pid = line[self.PID] | 1718 pid = line[self.PID] |
1737 if pid in self.processes: | 1719 if pid in self.processes: |
1738 logging.info('Terminated: %d' % pid) | 1720 logging.info('Terminated: %d' % pid) |
1739 self.processes[pid].cwd = None | 1721 self.processes[pid].cwd = None |
1740 | 1722 |
1741 def handle_Process_Start(self, line): | 1723 def handle_Process_Start(self, line): |
1742 """Handles a new child process started by PID.""" | 1724 """Handles a new child process started by PID.""" |
1743 #UNIQUE_PROCESS_KEY = 19 | 1725 #UNIQUE_PROCESS_KEY = 19 |
1744 PROCESS_ID = 20 | 1726 PROCESS_ID = 20 |
1745 #PARENT_PID = 21 | 1727 #PARENT_PID = 21 |
1746 #SESSION_ID = 22 | 1728 #SESSION_ID = 22 |
1747 #EXIT_STATUS = 23 | 1729 #EXIT_STATUS = 23 |
1748 #DIRECTORY_TABLE_BASE = 24 | 1730 #DIRECTORY_TABLE_BASE = 24 |
1749 #USER_SID = 25 | 1731 #USER_SID = 25 |
1750 IMAGE_FILE_NAME = 26 | 1732 IMAGE_FILE_NAME = 26 |
1751 COMMAND_LINE = 27 | 1733 COMMAND_LINE = 27 |
1752 | 1734 |
1753 ppid = line[self.PID] | 1735 ppid = line[self.PID] |
1754 pid = int(line[PROCESS_ID], 16) | 1736 pid = int(line[PROCESS_ID], 16) |
1755 if ppid == self._tracer_pid: | 1737 if ppid == self._tracer_pid: |
1756 # Need to ignore processes we don't know about because the log is | 1738 # Need to ignore processes we don't know about because the log is |
1757 # system-wide. | 1739 # system-wide. self._tracer_pid shall start only one process. |
1758 if line[IMAGE_FILE_NAME] == '"logman.exe"': | 1740 assert not self._initial_pid |
1759 # Skip the shutdown call when "logman.exe stop" is executed. | 1741 self._initial_pid = pid |
1760 return | |
1761 self._initial_pid = self._initial_pid or pid | |
1762 ppid = None | 1742 ppid = None |
1763 elif ppid not in self.processes: | 1743 elif ppid not in self.processes: |
1764 # Ignore | 1744 # Ignore |
1765 return | 1745 return |
1766 assert pid not in self.processes | 1746 assert pid not in self.processes |
1767 proc = self.processes[pid] = self.Process(self, pid, None, ppid) | 1747 proc = self.processes[pid] = self.Process(self, pid, None, ppid) |
1768 # TODO(maruel): Process escapes. | 1748 # TODO(maruel): Process escapes. |
1769 assert ( | 1749 assert ( |
1770 line[COMMAND_LINE].startswith('"') and | 1750 line[COMMAND_LINE].startswith('"') and |
1771 line[COMMAND_LINE].endswith('"')) | 1751 line[COMMAND_LINE].endswith('"')) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1847 # Also add their short path name equivalents. | 1827 # Also add their short path name equivalents. |
1848 for i in list(self.IGNORED): | 1828 for i in list(self.IGNORED): |
1849 self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) | 1829 self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) |
1850 | 1830 |
1851 # Add these last since they have no short path name equivalent. | 1831 # Add these last since they have no short path name equivalent. |
1852 self.IGNORED.add('\\SystemRoot') | 1832 self.IGNORED.add('\\SystemRoot') |
1853 self.IGNORED = tuple(sorted(self.IGNORED)) | 1833 self.IGNORED = tuple(sorted(self.IGNORED)) |
1854 | 1834 |
1855 @staticmethod | 1835 @staticmethod |
1856 def clean_trace(logname): | 1836 def clean_trace(logname): |
1857 if os.path.isfile(logname): | 1837 for ext in ('', '.csv', '.etl', '.xml'): |
1858 os.remove(logname) | 1838 if os.path.isfile(logname + ext): |
1859 if os.path.isfile(logname + '.etl'): | 1839 os.remove(logname + ext) |
1860 os.remove(logname + '.etl') | |
1861 | 1840 |
1862 @classmethod | 1841 @classmethod |
1863 def _start_log(cls, etl): | 1842 def _start_log(cls, etl): |
1864 """Starts the log collection. | 1843 """Starts the log collection. |
1865 | 1844 |
1866 Requires administrative access. logman.exe is synchronous so no need for a | 1845 Requires administrative access. logman.exe is synchronous so no need for a |
1867 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID | 1846 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID |
1868 instead. The GUID constant name is SystemTraceControlGuid. Lovely. | 1847 instead. The GUID constant name is SystemTraceControlGuid. Lovely. |
1869 | 1848 |
1870 One can get the list of potentially interesting providers with: | 1849 One can get the list of potentially interesting providers with: |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1907 'NT Kernel Logger', | 1886 'NT Kernel Logger', |
1908 '-ets', # Sends the command directly to the kernel. | 1887 '-ets', # Sends the command directly to the kernel. |
1909 ] | 1888 ] |
1910 logging.debug('Running: %s' % cmd_stop) | 1889 logging.debug('Running: %s' % cmd_stop) |
1911 subprocess.check_call( | 1890 subprocess.check_call( |
1912 cmd_stop, | 1891 cmd_stop, |
1913 stdin=subprocess.PIPE, | 1892 stdin=subprocess.PIPE, |
1914 stdout=subprocess.PIPE, | 1893 stdout=subprocess.PIPE, |
1915 stderr=subprocess.STDOUT) | 1894 stderr=subprocess.STDOUT) |
1916 | 1895 |
1917 @classmethod | 1896 @staticmethod |
1918 def gen_trace(cls, cmd, cwd, logname, output): | 1897 def _convert_log(logname, logformat, stdout, stderr): |
1919 """Uses logman.exe to start and stop the NT Kernel Logger while the | 1898 """Converts the ETL trace to text representation. |
1920 executable to be traced is run. | 1899 |
| 1900 Normally, 'csv' is sufficient. If complex scripts are used (like eastern |
| 1901 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'. |
| 1902 |
| 1903 Arguments: |
| 1904 - logname: Base filename to convert. |
| 1905 - logformat: Text format to be generated, csv, csv_utf16 or xml. |
| 1906 |
| 1907 Use "tracerpt -?" for help. |
1921 """ | 1908 """ |
1922 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | |
1923 # Use "logman -?" for help. | |
1924 | |
1925 etl = logname + '.etl' | |
1926 | |
1927 stdout = stderr = None | |
1928 if output: | |
1929 stdout = subprocess.PIPE | |
1930 stderr = subprocess.STDOUT | |
1931 | |
1932 # 1. Start the log collection. | |
1933 cls._start_log(etl) | |
1934 | |
1935 # 2. Run the child process. | |
1936 logging.debug('Running: %s' % cmd) | |
1937 try: | |
1938 child = subprocess.Popen( | |
1939 cmd, cwd=cwd, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) | |
1940 out = child.communicate()[0] | |
1941 finally: | |
1942 # 3. Stop the log collection. | |
1943 cls._stop_log() | |
1944 | |
1945 # 4. Convert the traces to text representation. | |
1946 # Use "tracerpt -?" for help. | |
1947 LOCALE_INVARIANT = 0x7F | 1909 LOCALE_INVARIANT = 0x7F |
1948 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT) | 1910 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT) |
1949 cmd_convert = [ | 1911 cmd_convert = [ |
1950 'tracerpt.exe', | 1912 'tracerpt.exe', |
1951 '-l', etl, | 1913 '-l', logname + '.etl', |
1952 '-o', logname, | 1914 '-o', logname + '.' + logformat, |
1953 '-gmt', # Use UTC | 1915 '-gmt', # Use UTC |
1954 '-y', # No prompt | 1916 '-y', # No prompt |
1955 # Use -of XML to get the header of each items after column 19, e.g. all | 1917 # Use -of XML to get the header of each items after column 19, e.g. all |
1956 # the actual headers of 'User Data'. | 1918 # the actual headers of 'User Data'. |
1957 ] | 1919 ] |
1958 | 1920 |
1959 # Normally, 'csv' is sufficient. If complex scripts are used (like eastern | |
1960 # languages), use 'csv_unicode'. If localization gets in the way, use 'xml'. | |
1961 logformat = 'csv' | |
1962 | |
1963 if logformat == 'csv': | 1921 if logformat == 'csv': |
1964 # tracerpt localizes the 'Type' column, for major brainfuck | 1922 # tracerpt localizes the 'Type' column, for major brainfuck |
1965 # entertainment. I can't imagine any sane reason to do that. | 1923 # entertainment. I can't imagine any sane reason to do that. |
1966 cmd_convert.extend(['-of', 'CSV']) | 1924 cmd_convert.extend(['-of', 'CSV']) |
1967 elif logformat == 'csv_utf16': | 1925 elif logformat == 'csv_utf16': |
1968 # This causes it to use UTF-16, which doubles the log size but ensures the | 1926 # This causes it to use UTF-16, which doubles the log size but ensures the |
1969 # log is readable for non-ASCII characters. | 1927 # log is readable for non-ASCII characters. |
1970 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode']) | 1928 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode']) |
1971 elif logformat == 'xml': | 1929 elif logformat == 'xml': |
1972 cmd_convert.extend(['-of', 'XML']) | 1930 cmd_convert.extend(['-of', 'XML']) |
1973 else: | 1931 else: |
1974 assert False, logformat | 1932 assert False, logformat |
1975 logging.debug('Running: %s' % cmd_convert) | 1933 logging.debug('Running: %s' % cmd_convert) |
| 1934 # This can takes tens of minutes for large logs. |
1976 subprocess.check_call( | 1935 subprocess.check_call( |
1977 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) | 1936 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) |
1978 | 1937 |
| 1938 @classmethod |
| 1939 def gen_trace(cls, cmd, cwd, logname, output): |
| 1940 """Uses logman.exe to start and stop the NT Kernel Logger while the |
| 1941 executable to be traced is run. |
| 1942 """ |
| 1943 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) |
| 1944 # Use "logman -?" for help. |
| 1945 |
| 1946 stdout = stderr = None |
| 1947 if output: |
| 1948 stdout = subprocess.PIPE |
| 1949 stderr = subprocess.STDOUT |
| 1950 |
| 1951 # 1. Start the log collection. |
| 1952 cls._start_log(logname + '.etl') |
| 1953 |
| 1954 # 2. Run the child process. |
| 1955 logging.debug('Running: %s' % cmd) |
| 1956 try: |
| 1957 # Use trace_child_process.py so we have a clear pid owner. Since |
| 1958 # trace_inputs.py can be used as a library and could trace mulitple |
| 1959 # processes simultaneously, it makes it more complex if the executable to |
| 1960 # be traced is executed directly here. It also solves issues related to |
| 1961 # logman.exe that needs to be executed to control the kernel trace. |
| 1962 child_cmd = [ |
| 1963 sys.executable, |
| 1964 os.path.join(BASE_DIR, 'trace_child_process.py'), |
| 1965 ] |
| 1966 child = subprocess.Popen( |
| 1967 child_cmd + cmd, |
| 1968 cwd=cwd, |
| 1969 stdin=subprocess.PIPE, |
| 1970 stdout=stdout, |
| 1971 stderr=stderr) |
| 1972 logging.debug('Started child pid: %d' % child.pid) |
| 1973 out = child.communicate()[0] |
| 1974 finally: |
| 1975 # 3. Stop the log collection. |
| 1976 cls._stop_log() |
| 1977 |
| 1978 # 4. Convert the traces to text representation. |
| 1979 cls._convert_log(logname, 'csv', stdout, stderr) |
| 1980 |
| 1981 # 5. Save metadata. |
| 1982 json.dump({ |
| 1983 'pid': child.pid, |
| 1984 'format': 'csv', |
| 1985 }, open(logname, 'w')) |
1979 return child.returncode, out | 1986 return child.returncode, out |
1980 | 1987 |
1981 @classmethod | 1988 @classmethod |
1982 def parse_log(cls, filename, blacklist): | 1989 def parse_log(cls, filename, blacklist): |
1983 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1990 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
1984 | 1991 |
1985 def blacklist_more(filepath): | 1992 def blacklist_more(filepath): |
1986 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. | 1993 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
1987 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) | 1994 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
1988 | 1995 |
1989 # Auto-detect the log format. | 1996 data = json.load(open(filename)) |
1990 with open(filename, 'rb') as f: | 1997 logformat = data['format'] |
1991 hdr = f.read(2) | |
1992 assert len(hdr) == 2 | |
1993 if hdr == '<E': | |
1994 # It starts with <Events>. | |
1995 logformat = 'xml' | |
1996 elif hdr == '\xFF\xEF': | |
1997 # utf-16 BOM. | |
1998 logformat = 'csv_utf16' | |
1999 else: | |
2000 logformat = 'csv' | |
2001 | 1998 |
2002 context = cls.Context(blacklist_more) | 1999 context = cls.Context(blacklist_more, data['pid']) |
2003 | 2000 |
2004 if logformat == 'csv_utf16': | 2001 if logformat == 'csv_utf16': |
2005 def utf_8_encoder(unicode_csv_data): | 2002 def utf_8_encoder(unicode_csv_data): |
2006 """Encodes the unicode object as utf-8 encoded str instance""" | 2003 """Encodes the unicode object as utf-8 encoded str instance""" |
2007 for line in unicode_csv_data: | 2004 for line in unicode_csv_data: |
2008 yield line.encode('utf-8') | 2005 yield line.encode('utf-8') |
2009 | 2006 |
2010 def unicode_csv_reader(unicode_csv_data, **kwargs): | 2007 def unicode_csv_reader(unicode_csv_data, **kwargs): |
2011 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.""" | 2008 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.""" |
2012 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs) | 2009 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs) |
2013 for row in csv_reader: | 2010 for row in csv_reader: |
2014 # Decode str utf-8 instances back to unicode instances, cell by cell: | 2011 # Decode str utf-8 instances back to unicode instances, cell by cell: |
2015 yield [cell.decode('utf-8') for cell in row] | 2012 yield [cell.decode('utf-8') for cell in row] |
2016 | 2013 |
2017 # The CSV file is UTF-16 so use codecs.open() to load the file into the | 2014 # The CSV file is UTF-16 so use codecs.open() to load the file into the |
2018 # python internal unicode format (utf-8). Then explicitly re-encode as | 2015 # python internal unicode format (utf-8). Then explicitly re-encode as |
2019 # utf8 as str instances so csv can parse it fine. Then decode the utf-8 | 2016 # utf8 as str instances so csv can parse it fine. Then decode the utf-8 |
2020 # str back into python unicode instances. This sounds about right. | 2017 # str back into python unicode instances. This sounds about right. |
2021 for line in unicode_csv_reader(codecs.open(filename, 'r', 'utf-16')): | 2018 for line in unicode_csv_reader( |
| 2019 codecs.open(filename + '.' + logformat, 'r', 'utf-16')): |
2022 # line is a list of unicode objects | 2020 # line is a list of unicode objects |
2023 context.on_csv_line(line) | 2021 context.on_csv_line(line) |
2024 | 2022 |
2025 elif logformat == 'csv': | 2023 elif logformat == 'csv': |
2026 def ansi_csv_reader(ansi_csv_data, **kwargs): | 2024 def ansi_csv_reader(ansi_csv_data, **kwargs): |
2027 """Loads an 'ANSI' code page and returns unicode() objects.""" | 2025 """Loads an 'ANSI' code page and returns unicode() objects.""" |
2028 assert sys.getfilesystemencoding() == 'mbcs' | 2026 assert sys.getfilesystemencoding() == 'mbcs' |
2029 encoding = get_current_encoding() | 2027 encoding = get_current_encoding() |
2030 for row in csv.reader(ansi_csv_data, **kwargs): | 2028 for row in csv.reader(ansi_csv_data, **kwargs): |
2031 # Decode str 'ansi' instances to unicode instances, cell by cell: | 2029 # Decode str 'ansi' instances to unicode instances, cell by cell: |
2032 yield [cell.decode(encoding) for cell in row] | 2030 yield [cell.decode(encoding) for cell in row] |
2033 | 2031 |
2034 # The fastest and smallest format but only supports 'ANSI' file paths. | 2032 # The fastest and smallest format but only supports 'ANSI' file paths. |
2035 # E.g. the filenames are encoding in the 'current' encoding. | 2033 # E.g. the filenames are encoding in the 'current' encoding. |
2036 for line in ansi_csv_reader(open(filename)): | 2034 for line in ansi_csv_reader(open(filename + '.' + logformat)): |
2037 # line is a list of unicode objects. | 2035 # line is a list of unicode objects. |
2038 context.on_csv_line(line) | 2036 context.on_csv_line(line) |
2039 | 2037 |
2040 else: | 2038 else: |
2041 raise NotImplementedError('Implement %s' % logformat) | 2039 raise NotImplementedError('Implement %s' % logformat) |
2042 | 2040 |
2043 return context.to_results() | 2041 return context.to_results() |
2044 | 2042 |
2045 | 2043 |
2046 def pretty_print(variables, stdout): | 2044 def pretty_print(variables, stdout): |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2317 os.path.abspath(options.log), | 2315 os.path.abspath(options.log), |
2318 args, | 2316 args, |
2319 options.root_dir, | 2317 options.root_dir, |
2320 options.cwd, | 2318 options.cwd, |
2321 options.product_dir, | 2319 options.product_dir, |
2322 options.force) | 2320 options.force) |
2323 | 2321 |
2324 | 2322 |
2325 if __name__ == '__main__': | 2323 if __name__ == '__main__': |
2326 sys.exit(main()) | 2324 sys.exit(main()) |
OLD | NEW |