OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # coding=utf-8 | 2 # coding=utf-8 |
3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 3 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
4 # Use of this source code is governed by a BSD-style license that can be | 4 # Use of this source code is governed by a BSD-style license that can be |
5 # found in the LICENSE file. | 5 # found in the LICENSE file. |
6 | 6 |
7 """Traces an executable and its child processes and extract the files accessed | 7 """Traces an executable and its child processes and extract the files accessed |
8 by them. | 8 by them. |
9 | 9 |
10 The implementation uses OS-specific API. The native Kernel logger and the ETL | 10 The implementation uses OS-specific API. The native Kernel logger and the ETL |
(...skipping 1374 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1385 logging.info('Running: %s' % cmd) | 1385 logging.info('Running: %s' % cmd) |
1386 signal = 'Go!' | 1386 signal = 'Go!' |
1387 logging.debug('Our pid: %d' % os.getpid()) | 1387 logging.debug('Our pid: %d' % os.getpid()) |
1388 | 1388 |
1389 # Part 1: start the child process. | 1389 # Part 1: start the child process. |
1390 stdout = stderr = None | 1390 stdout = stderr = None |
1391 if output: | 1391 if output: |
1392 stdout = subprocess.PIPE | 1392 stdout = subprocess.PIPE |
1393 stderr = subprocess.STDOUT | 1393 stderr = subprocess.STDOUT |
1394 child_cmd = [ | 1394 child_cmd = [ |
1395 sys.executable, os.path.join(BASE_DIR, 'trace_child_process.py'), | 1395 sys.executable, |
1396 os.path.join(BASE_DIR, 'trace_child_process.py'), | |
1397 '--wait', | |
1396 ] | 1398 ] |
1397 child = subprocess.Popen( | 1399 child = subprocess.Popen( |
1398 child_cmd + cmd, | 1400 child_cmd + cmd, |
1399 stdin=subprocess.PIPE, | 1401 stdin=subprocess.PIPE, |
1400 stdout=stdout, | 1402 stdout=stdout, |
1401 stderr=stderr, | 1403 stderr=stderr, |
1402 cwd=cwd) | 1404 cwd=cwd) |
1403 logging.debug('Started child pid: %d' % child.pid) | 1405 logging.debug('Started child pid: %d' % child.pid) |
1404 | 1406 |
1405 # Part 2: start dtrace process. | 1407 # Part 2: start dtrace process. |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1480 print >> sys.stderr, '\n'.join(lines) | 1482 print >> sys.stderr, '\n'.join(lines) |
1481 raise | 1483 raise |
1482 with open(logname, 'wb') as logfile: | 1484 with open(logname, 'wb') as logfile: |
1483 logfile.write(''.join(lines)) | 1485 logfile.write(''.join(lines)) |
1484 | 1486 |
1485 | 1487 |
1486 class LogmanTrace(ApiBase): | 1488 class LogmanTrace(ApiBase): |
1487 """Uses the native Windows ETW based tracing functionality to trace a child | 1489 """Uses the native Windows ETW based tracing functionality to trace a child |
1488 process. | 1490 process. |
1489 | 1491 |
1490 Caveat: this implementations doesn't track cwd or initial_cwd. | 1492 Caveat: this implementations doesn't track cwd or initial_cwd. It is because |
1493 the Windows Kernel doesn't have a concept of current working directory. | |
cmp
2012/06/04 22:15:34
of a single current working directory per process
M-A Ruel
2012/06/04 23:18:06
There's no current directory concept at all. I cha
| |
1494 Windows has a map of current directories, one per drive letter and it is | |
1495 managed by the user mode kernel32.dll. In kernel, a file is always opened | |
1496 relative to another file_object or as an absolute path. | |
1491 """ | 1497 """ |
1492 class Context(ApiBase.Context): | 1498 class Context(ApiBase.Context): |
1493 """Processes a ETW log line and keeps the list of existent and non | 1499 """Processes a ETW log line and keeps the list of existent and non |
1494 existent files accessed. | 1500 existent files accessed. |
1495 | 1501 |
1496 Ignores directories. | 1502 Ignores directories. |
1497 """ | 1503 """ |
1498 # Only the useful headers common to all entries are listed there. Any column | 1504 # Only the useful headers common to all entries are listed there. Any column |
1499 # at 19 or higher is dependent on the specific event. | 1505 # at 19 or higher is dependent on the specific event. |
1500 EVENT_NAME = 0 | 1506 EVENT_NAME = 0 |
1501 TYPE = 1 | 1507 TYPE = 1 |
1502 PID = 9 | 1508 PID = 9 |
1503 TID = 10 | 1509 TID = 10 |
1504 PROCESSOR_ID = 11 | 1510 PROCESSOR_ID = 11 |
1505 TIMESTAMP = 16 | 1511 TIMESTAMP = 16 |
1506 | 1512 |
1507 class Process(ApiBase.Context.Process): | 1513 class Process(ApiBase.Context.Process): |
1508 def __init__(self, *args): | 1514 def __init__(self, *args): |
1509 super(LogmanTrace.Context.Process, self).__init__(*args) | 1515 super(LogmanTrace.Context.Process, self).__init__(*args) |
1510 # Handle file objects that succeeded. | 1516 # Handle file objects that succeeded. |
1511 self.file_objects = {} | 1517 self.file_objects = {} |
1512 | 1518 |
1513 def __init__(self, blacklist): | 1519 def __init__(self, blacklist, tracer_pid): |
1514 super(LogmanTrace.Context, self).__init__(blacklist) | 1520 super(LogmanTrace.Context, self).__init__(blacklist) |
1515 self._drive_map = DosDriveMap() | 1521 self._drive_map = DosDriveMap() |
1516 # Threads mapping to the corresponding process id. | 1522 # Threads mapping to the corresponding process id. |
1517 self._threads_active = {} | 1523 self._threads_active = {} |
1518 # Process ID of the tracer, e.g. tracer_inputs.py | 1524 # Process ID of the tracer, e.g. tracer_inputs.py |
1519 self._tracer_pid = None | 1525 self._tracer_pid = tracer_pid |
1520 # First process to be started by self._tracer_pid. | 1526 # First process to be started by self._tracer_pid is the executable |
1527 # traced. | |
1521 self._initial_pid = None | 1528 self._initial_pid = None |
1522 self._line_number = 0 | 1529 self._line_number = 0 |
1523 | 1530 |
1524 def on_csv_line(self, line): | 1531 def on_csv_line(self, line): |
1525 """Processes a CSV Event line.""" | 1532 """Processes a CSV Event line.""" |
1526 # So much white space! | 1533 # So much white space! |
1527 line = [i.strip() for i in line] | 1534 line = [i.strip() for i in line] |
1528 self._line_number += 1 | 1535 self._line_number += 1 |
1529 if self._line_number == 1: | 1536 if self._line_number == 1: |
1530 assert line == [ | 1537 assert line == [ |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1696 def handle_FileIo_Rename(self, line): | 1703 def handle_FileIo_Rename(self, line): |
1697 # TODO(maruel): Handle? | 1704 # TODO(maruel): Handle? |
1698 pass | 1705 pass |
1699 | 1706 |
1700 def handle_FileIo_Any(self, line): | 1707 def handle_FileIo_Any(self, line): |
1701 pass | 1708 pass |
1702 | 1709 |
1703 def handle_Process_Any(self, line): | 1710 def handle_Process_Any(self, line): |
1704 pass | 1711 pass |
1705 | 1712 |
1706 def handle_Process_DCStart(self, line): | |
1707 """Gives historic information about the process tree. | |
1708 | |
1709 Use it to extract the pid of the trace_inputs.py parent process that | |
1710 started logman.exe. | |
1711 """ | |
1712 #UNIQUE_PROCESS_KEY = 19 | |
1713 #PROCESS_ID = 20 | |
1714 PARENT_PID = 21 | |
1715 #SESSION_ID = 22 | |
1716 #EXIT_STATUS = 23 | |
1717 #DIRECTORY_TABLE_BASE = 24 | |
1718 #USER_SID = 25 | |
1719 IMAGE_FILE_NAME = 26 | |
1720 #COMMAND_LINE = 27 | |
1721 | |
1722 ppid = int(line[PARENT_PID], 16) | |
1723 if line[IMAGE_FILE_NAME] == '"logman.exe"': | |
1724 # logman's parent is trace_input.py or whatever tool using it as a | |
1725 # library. Trace any other children started by it. | |
1726 assert not self._tracer_pid | |
1727 self._tracer_pid = ppid | |
1728 logging.info('Found logman\'s parent at %d' % ppid) | |
1729 | |
1730 def handle_Process_End(self, line): | 1713 def handle_Process_End(self, line): |
1731 # Look if it is logman terminating, if so, grab the parent's process pid | |
1732 # and inject cwd. | |
1733 pid = line[self.PID] | 1714 pid = line[self.PID] |
1734 if pid in self.processes: | 1715 if pid in self.processes: |
1735 logging.info('Terminated: %d' % pid) | 1716 logging.info('Terminated: %d' % pid) |
1736 self.processes[pid].cwd = None | 1717 self.processes[pid].cwd = None |
1737 | 1718 |
1738 def handle_Process_Start(self, line): | 1719 def handle_Process_Start(self, line): |
1739 """Handles a new child process started by PID.""" | 1720 """Handles a new child process started by PID.""" |
1740 #UNIQUE_PROCESS_KEY = 19 | 1721 #UNIQUE_PROCESS_KEY = 19 |
1741 PROCESS_ID = 20 | 1722 PROCESS_ID = 20 |
1742 #PARENT_PID = 21 | 1723 #PARENT_PID = 21 |
1743 #SESSION_ID = 22 | 1724 #SESSION_ID = 22 |
1744 #EXIT_STATUS = 23 | 1725 #EXIT_STATUS = 23 |
1745 #DIRECTORY_TABLE_BASE = 24 | 1726 #DIRECTORY_TABLE_BASE = 24 |
1746 #USER_SID = 25 | 1727 #USER_SID = 25 |
1747 IMAGE_FILE_NAME = 26 | 1728 IMAGE_FILE_NAME = 26 |
1748 COMMAND_LINE = 27 | 1729 COMMAND_LINE = 27 |
1749 | 1730 |
1750 ppid = line[self.PID] | 1731 ppid = line[self.PID] |
1751 pid = int(line[PROCESS_ID], 16) | 1732 pid = int(line[PROCESS_ID], 16) |
1752 if ppid == self._tracer_pid: | 1733 if ppid == self._tracer_pid: |
1753 # Need to ignore processes we don't know about because the log is | 1734 # Need to ignore processes we don't know about because the log is |
1754 # system-wide. | 1735 # system-wide. self._tracer_pid shall start only one process. |
1755 if line[IMAGE_FILE_NAME] == '"logman.exe"': | 1736 assert not self._initial_pid |
1756 # Skip the shutdown call when "logman.exe stop" is executed. | 1737 self._initial_pid = pid |
1757 return | |
1758 self._initial_pid = self._initial_pid or pid | |
1759 ppid = None | 1738 ppid = None |
1760 elif ppid not in self.processes: | 1739 elif ppid not in self.processes: |
1761 # Ignore | 1740 # Ignore |
1762 return | 1741 return |
1763 assert pid not in self.processes | 1742 assert pid not in self.processes |
1764 proc = self.processes[pid] = self.Process(self, pid, None, ppid) | 1743 proc = self.processes[pid] = self.Process(self, pid, None, ppid) |
1765 # TODO(maruel): Process escapes. | 1744 # TODO(maruel): Process escapes. |
1766 assert ( | 1745 assert ( |
1767 line[COMMAND_LINE].startswith('"') and | 1746 line[COMMAND_LINE].startswith('"') and |
1768 line[COMMAND_LINE].endswith('"')) | 1747 line[COMMAND_LINE].endswith('"')) |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1844 # Also add their short path name equivalents. | 1823 # Also add their short path name equivalents. |
1845 for i in list(self.IGNORED): | 1824 for i in list(self.IGNORED): |
1846 self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) | 1825 self.IGNORED.add(GetShortPathName(i.replace('/', os.path.sep))) |
1847 | 1826 |
1848 # Add these last since they have no short path name equivalent. | 1827 # Add these last since they have no short path name equivalent. |
1849 self.IGNORED.add('\\SystemRoot') | 1828 self.IGNORED.add('\\SystemRoot') |
1850 self.IGNORED = tuple(sorted(self.IGNORED)) | 1829 self.IGNORED = tuple(sorted(self.IGNORED)) |
1851 | 1830 |
1852 @staticmethod | 1831 @staticmethod |
1853 def clean_trace(logname): | 1832 def clean_trace(logname): |
1854 if os.path.isfile(logname): | 1833 for ext in ('', '.csv', '.etl', '.xml'): |
1855 os.remove(logname) | 1834 if os.path.isfile(logname + ext): |
1856 if os.path.isfile(logname + '.etl'): | 1835 os.remove(logname + ext) |
1857 os.remove(logname + '.etl') | |
1858 | 1836 |
1859 @classmethod | 1837 @classmethod |
1860 def _start_log(cls, etl): | 1838 def _start_log(cls, etl): |
1861 """Starts the log collection. | 1839 """Starts the log collection. |
1862 | 1840 |
1863 Requires administrative access. logman.exe is synchronous so no need for a | 1841 Requires administrative access. logman.exe is synchronous so no need for a |
1864 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID | 1842 "warmup" call. 'Windows Kernel Trace' is *localized* so use its GUID |
1865 instead. The GUID constant name is SystemTraceControlGuid. Lovely. | 1843 instead. The GUID constant name is SystemTraceControlGuid. Lovely. |
1866 | 1844 |
1867 One can get the list of potentially interesting providers with: | 1845 One can get the list of potentially interesting providers with: |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1904 'NT Kernel Logger', | 1882 'NT Kernel Logger', |
1905 '-ets', # Sends the command directly to the kernel. | 1883 '-ets', # Sends the command directly to the kernel. |
1906 ] | 1884 ] |
1907 logging.debug('Running: %s' % cmd_stop) | 1885 logging.debug('Running: %s' % cmd_stop) |
1908 subprocess.check_call( | 1886 subprocess.check_call( |
1909 cmd_stop, | 1887 cmd_stop, |
1910 stdin=subprocess.PIPE, | 1888 stdin=subprocess.PIPE, |
1911 stdout=subprocess.PIPE, | 1889 stdout=subprocess.PIPE, |
1912 stderr=subprocess.STDOUT) | 1890 stderr=subprocess.STDOUT) |
1913 | 1891 |
1914 @classmethod | 1892 @staticmethod |
1915 def gen_trace(cls, cmd, cwd, logname, output): | 1893 def _convert_log(logname, logformat, stdout, stderr): |
1916 """Uses logman.exe to start and stop the NT Kernel Logger while the | 1894 """Converts the ETL trace to text representation. |
1917 executable to be traced is run. | 1895 |
1896 Normally, 'csv' is sufficient. If complex scripts are used (like eastern | |
1897 languages), use 'csv_utf16'. If localization gets in the way, use 'xml'. | |
1898 | |
1899 Arguments: | |
1900 - logname: Base filename to convert. | |
1901 - logformat: Text format to be generated, csv, csv_utf16 or xml. | |
1902 | |
1903 Use "tracerpt -?" for help. | |
1918 """ | 1904 """ |
1919 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | |
1920 # Use "logman -?" for help. | |
1921 | |
1922 etl = logname + '.etl' | |
1923 | |
1924 stdout = stderr = None | |
1925 if output: | |
1926 stdout = subprocess.PIPE | |
1927 stderr = subprocess.STDOUT | |
1928 | |
1929 # 1. Start the log collection. | |
1930 cls._start_log(etl) | |
1931 | |
1932 # 2. Run the child process. | |
1933 logging.debug('Running: %s' % cmd) | |
1934 try: | |
1935 child = subprocess.Popen( | |
1936 cmd, cwd=cwd, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) | |
1937 out = child.communicate()[0] | |
1938 finally: | |
1939 # 3. Stop the log collection. | |
1940 cls._stop_log() | |
1941 | |
1942 # 4. Convert the traces to text representation. | |
1943 # Use "tracerpt -?" for help. | |
1944 LOCALE_INVARIANT = 0x7F | 1905 LOCALE_INVARIANT = 0x7F |
1945 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT) | 1906 windll.kernel32.SetThreadLocale(LOCALE_INVARIANT) |
1946 cmd_convert = [ | 1907 cmd_convert = [ |
1947 'tracerpt.exe', | 1908 'tracerpt.exe', |
1948 '-l', etl, | 1909 '-l', logname + '.etl', |
1949 '-o', logname, | 1910 '-o', logname + '.' + logformat, |
1950 '-gmt', # Use UTC | 1911 '-gmt', # Use UTC |
1951 '-y', # No prompt | 1912 '-y', # No prompt |
1952 # Use -of XML to get the header of each items after column 19, e.g. all | 1913 # Use -of XML to get the header of each items after column 19, e.g. all |
1953 # the actual headers of 'User Data'. | 1914 # the actual headers of 'User Data'. |
1954 ] | 1915 ] |
1955 | 1916 |
1956 # Normally, 'csv' is sufficient. If complex scripts are used (like eastern | |
1957 # languages), use 'csv_unicode'. If localization gets in the way, use 'xml'. | |
1958 logformat = 'csv' | |
1959 | |
1960 if logformat == 'csv': | 1917 if logformat == 'csv': |
1961 # tracerpt localizes the 'Type' column, for major brainfuck | 1918 # tracerpt localizes the 'Type' column, for major brainfuck |
1962 # entertainment. I can't imagine any sane reason to do that. | 1919 # entertainment. I can't imagine any sane reason to do that. |
1963 cmd_convert.extend(['-of', 'CSV']) | 1920 cmd_convert.extend(['-of', 'CSV']) |
1964 elif logformat == 'csv_utf16': | 1921 elif logformat == 'csv_utf16': |
1965 # This causes it to use UTF-16, which doubles the log size but ensures the | 1922 # This causes it to use UTF-16, which doubles the log size but ensures the |
1966 # log is readable for non-ASCII characters. | 1923 # log is readable for non-ASCII characters. |
1967 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode']) | 1924 cmd_convert.extend(['-of', 'CSV', '-en', 'Unicode']) |
1968 elif logformat == 'xml': | 1925 elif logformat == 'xml': |
1969 cmd_convert.extend(['-of', 'XML']) | 1926 cmd_convert.extend(['-of', 'XML']) |
1970 else: | 1927 else: |
1971 assert False, logformat | 1928 assert False, logformat |
1972 logging.debug('Running: %s' % cmd_convert) | 1929 logging.debug('Running: %s' % cmd_convert) |
1930 # This can takes tens of minutes for large logs. | |
1973 subprocess.check_call( | 1931 subprocess.check_call( |
1974 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) | 1932 cmd_convert, stdin=subprocess.PIPE, stdout=stdout, stderr=stderr) |
1975 | 1933 |
1934 @classmethod | |
1935 def gen_trace(cls, cmd, cwd, logname, output): | |
1936 """Uses logman.exe to start and stop the NT Kernel Logger while the | |
1937 executable to be traced is run. | |
1938 """ | |
1939 logging.info('gen_trace(%s, %s, %s, %s)' % (cmd, cwd, logname, output)) | |
1940 # Use "logman -?" for help. | |
1941 | |
1942 stdout = stderr = None | |
1943 if output: | |
1944 stdout = subprocess.PIPE | |
1945 stderr = subprocess.STDOUT | |
1946 | |
1947 # 1. Start the log collection. | |
1948 cls._start_log(logname + '.etl') | |
1949 | |
1950 # 2. Run the child process. | |
1951 logging.debug('Running: %s' % cmd) | |
1952 try: | |
1953 # Use trace_child_process.py so we have a clear pid owner. Since | |
1954 # trace_inputs.py can be used as a library and could trace mulitple | |
1955 # processes simultaneously, it makes it more complex if the executable to | |
1956 # be traced is executed directly here. It also solves issues related to | |
1957 # logman.exe that needs to be executed to control the kernel trace. | |
1958 child_cmd = [ | |
1959 sys.executable, | |
1960 os.path.join(BASE_DIR, 'trace_child_process.py'), | |
1961 ] | |
1962 child = subprocess.Popen( | |
1963 child_cmd + cmd, | |
1964 cwd=cwd, | |
1965 stdin=subprocess.PIPE, | |
1966 stdout=stdout, | |
1967 stderr=stderr) | |
1968 logging.debug('Started child pid: %d' % child.pid) | |
1969 out = child.communicate()[0] | |
1970 finally: | |
1971 # 3. Stop the log collection. | |
1972 cls._stop_log() | |
1973 | |
1974 # 4. Convert the traces to text representation. | |
1975 cls._convert_log(logname, 'csv', stdout, stderr) | |
1976 | |
1977 # 5. Save metadata. | |
1978 json.dump({ | |
1979 'pid': child.pid, | |
1980 'format': 'csv', | |
1981 }, open(logname, 'w')) | |
1976 return child.returncode, out | 1982 return child.returncode, out |
1977 | 1983 |
1978 @classmethod | 1984 @classmethod |
1979 def parse_log(cls, filename, blacklist): | 1985 def parse_log(cls, filename, blacklist): |
1980 logging.info('parse_log(%s, %s)' % (filename, blacklist)) | 1986 logging.info('parse_log(%s, %s)' % (filename, blacklist)) |
1981 | 1987 |
1982 def blacklist_more(filepath): | 1988 def blacklist_more(filepath): |
1983 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. | 1989 # All the NTFS metadata is in the form x:\$EXTEND or stuff like that. |
1984 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) | 1990 return blacklist(filepath) or re.match(r'[A-Z]\:\\\$EXTEND', filepath) |
1985 | 1991 |
1986 # Auto-detect the log format. | 1992 data = json.load(open(filename)) |
1987 with open(filename, 'rb') as f: | 1993 logformat = data['format'] |
1988 hdr = f.read(2) | |
1989 assert len(hdr) == 2 | |
1990 if hdr == '<E': | |
1991 # It starts with <Events>. | |
1992 logformat = 'xml' | |
1993 elif hdr == '\xFF\xEF': | |
1994 # utf-16 BOM. | |
1995 logformat = 'csv_utf16' | |
1996 else: | |
1997 logformat = 'csv' | |
1998 | 1994 |
1999 context = cls.Context(blacklist_more) | 1995 context = cls.Context(blacklist_more, data['pid']) |
2000 | 1996 |
2001 if logformat == 'csv_utf16': | 1997 if logformat == 'csv_utf16': |
2002 def utf_8_encoder(unicode_csv_data): | 1998 def utf_8_encoder(unicode_csv_data): |
2003 """Encodes the unicode object as utf-8 encoded str instance""" | 1999 """Encodes the unicode object as utf-8 encoded str instance""" |
2004 for line in unicode_csv_data: | 2000 for line in unicode_csv_data: |
2005 yield line.encode('utf-8') | 2001 yield line.encode('utf-8') |
2006 | 2002 |
2007 def unicode_csv_reader(unicode_csv_data, **kwargs): | 2003 def unicode_csv_reader(unicode_csv_data, **kwargs): |
2008 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.""" | 2004 """Encodes temporarily as UTF-8 since csv module doesn't do unicode.""" |
2009 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs) | 2005 csv_reader = csv.reader(utf_8_encoder(unicode_csv_data), **kwargs) |
2010 for row in csv_reader: | 2006 for row in csv_reader: |
2011 # Decode str utf-8 instances back to unicode instances, cell by cell: | 2007 # Decode str utf-8 instances back to unicode instances, cell by cell: |
2012 yield [cell.decode('utf-8') for cell in row] | 2008 yield [cell.decode('utf-8') for cell in row] |
2013 | 2009 |
2014 # The CSV file is UTF-16 so use codecs.open() to load the file into the | 2010 # The CSV file is UTF-16 so use codecs.open() to load the file into the |
2015 # python internal unicode format (utf-8). Then explicitly re-encode as | 2011 # python internal unicode format (utf-8). Then explicitly re-encode as |
2016 # utf8 as str instances so csv can parse it fine. Then decode the utf-8 | 2012 # utf8 as str instances so csv can parse it fine. Then decode the utf-8 |
2017 # str back into python unicode instances. This sounds about right. | 2013 # str back into python unicode instances. This sounds about right. |
2018 for line in unicode_csv_reader(codecs.open(filename, 'r', 'utf-16')): | 2014 for line in unicode_csv_reader( |
2015 codecs.open(filename + '.' + logformat, 'r', 'utf-16')): | |
2019 # line is a list of unicode objects | 2016 # line is a list of unicode objects |
2020 context.on_csv_line(line) | 2017 context.on_csv_line(line) |
2021 | 2018 |
2022 elif logformat == 'csv': | 2019 elif logformat == 'csv': |
2023 def ansi_csv_reader(ansi_csv_data, **kwargs): | 2020 def ansi_csv_reader(ansi_csv_data, **kwargs): |
2024 """Loads an 'ANSI' code page and returns unicode() objects.""" | 2021 """Loads an 'ANSI' code page and returns unicode() objects.""" |
2025 assert sys.getfilesystemencoding() == 'mbcs' | 2022 assert sys.getfilesystemencoding() == 'mbcs' |
2026 encoding = get_current_encoding() | 2023 encoding = get_current_encoding() |
2027 for row in csv.reader(ansi_csv_data, **kwargs): | 2024 for row in csv.reader(ansi_csv_data, **kwargs): |
2028 # Decode str 'ansi' instances to unicode instances, cell by cell: | 2025 # Decode str 'ansi' instances to unicode instances, cell by cell: |
2029 yield [cell.decode(encoding) for cell in row] | 2026 yield [cell.decode(encoding) for cell in row] |
2030 | 2027 |
2031 # The fastest and smallest format but only supports 'ANSI' file paths. | 2028 # The fastest and smallest format but only supports 'ANSI' file paths. |
2032 # E.g. the filenames are encoding in the 'current' encoding. | 2029 # E.g. the filenames are encoding in the 'current' encoding. |
2033 for line in ansi_csv_reader(open(filename)): | 2030 for line in ansi_csv_reader(open(filename + '.' + logformat)): |
2034 # line is a list of unicode objects. | 2031 # line is a list of unicode objects. |
2035 context.on_csv_line(line) | 2032 context.on_csv_line(line) |
2036 | 2033 |
2037 else: | 2034 else: |
2038 raise NotImplementedError('Implement %s' % logformat) | 2035 raise NotImplementedError('Implement %s' % logformat) |
2039 | 2036 |
2040 return context.to_results() | 2037 return context.to_results() |
2041 | 2038 |
2042 | 2039 |
2043 def pretty_print(variables, stdout): | 2040 def pretty_print(variables, stdout): |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2314 os.path.abspath(options.log), | 2311 os.path.abspath(options.log), |
2315 args, | 2312 args, |
2316 options.root_dir, | 2313 options.root_dir, |
2317 options.cwd, | 2314 options.cwd, |
2318 options.product_dir, | 2315 options.product_dir, |
2319 options.force) | 2316 options.force) |
2320 | 2317 |
2321 | 2318 |
2322 if __name__ == '__main__': | 2319 if __name__ == '__main__': |
2323 sys.exit(main()) | 2320 sys.exit(main()) |
OLD | NEW |