| OLD | NEW |
| 1 # coding=utf8 | 1 # coding=utf8 |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 """Utility functions to handle patches.""" | 5 """Utility functions to handle patches.""" |
| 6 | 6 |
| 7 import posixpath | 7 import posixpath |
| 8 import os | 8 import os |
| 9 import re | 9 import re |
| 10 | 10 |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 is_delete = False | 30 is_delete = False |
| 31 is_binary = False | 31 is_binary = False |
| 32 is_new = False | 32 is_new = False |
| 33 | 33 |
| 34 def __init__(self, filename): | 34 def __init__(self, filename): |
| 35 assert self.__class__ is not FilePatchBase | 35 assert self.__class__ is not FilePatchBase |
| 36 self.filename = self._process_filename(filename) | 36 self.filename = self._process_filename(filename) |
| 37 # Set when the file is copied or moved. | 37 # Set when the file is copied or moved. |
| 38 self.source_filename = None | 38 self.source_filename = None |
| 39 | 39 |
| 40 @property |
| 41 def filename_utf8(self): |
| 42 return self.filename.encode('utf-8') |
| 43 |
| 44 @property |
| 45 def source_filename_utf8(self): |
| 46 if self.source_filename is not None: |
| 47 return self.source_filename.encode('utf-8') |
| 48 |
| 40 @staticmethod | 49 @staticmethod |
| 41 def _process_filename(filename): | 50 def _process_filename(filename): |
| 42 filename = filename.replace('\\', '/') | 51 filename = filename.replace('\\', '/') |
| 43 # Blacklist a few characters for simplicity. | 52 # Blacklist a few characters for simplicity. |
| 44 for i in ('%', '$', '..', '\'', '"'): | 53 for i in ('%', '$', '..', '\'', '"'): |
| 45 if i in filename: | 54 if i in filename: |
| 46 raise UnsupportedPatchFormat( | 55 raise UnsupportedPatchFormat( |
| 47 filename, 'Can\'t use \'%s\' in filename.' % i) | 56 filename, 'Can\'t use \'%s\' in filename.' % i) |
| 48 for i in ('/', 'CON', 'COM'): | 57 for i in ('/', 'CON', 'COM'): |
| 49 if filename.startswith(i): | 58 if filename.startswith(i): |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 81 if self.is_new: | 90 if self.is_new: |
| 82 out += 'N' | 91 out += 'N' |
| 83 else: | 92 else: |
| 84 out += ' ' | 93 out += ' ' |
| 85 if self.source_filename: | 94 if self.source_filename: |
| 86 out += 'R' | 95 out += 'R' |
| 87 else: | 96 else: |
| 88 out += ' ' | 97 out += ' ' |
| 89 out += ' ' | 98 out += ' ' |
| 90 if self.source_filename: | 99 if self.source_filename: |
| 91 out += '%s->' % self.source_filename | 100 out += '%s->' % self.source_filename_utf8 |
| 92 return out + str(self.filename) | 101 return out + self.filename_utf8 |
| 93 | 102 |
| 94 | 103 |
| 95 class FilePatchDelete(FilePatchBase): | 104 class FilePatchDelete(FilePatchBase): |
| 96 """Deletes a file.""" | 105 """Deletes a file.""" |
| 97 is_delete = True | 106 is_delete = True |
| 98 | 107 |
| 99 def __init__(self, filename, is_binary): | 108 def __init__(self, filename, is_binary): |
| 100 super(FilePatchDelete, self).__init__(filename) | 109 super(FilePatchDelete, self).__init__(filename) |
| 101 self.is_binary = is_binary | 110 self.is_binary = is_binary |
| 102 | 111 |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 146 if self.source_filename and not self.is_new: | 155 if self.source_filename and not self.is_new: |
| 147 self._fail('If source_filename is set, is_new must be also be set') | 156 self._fail('If source_filename is set, is_new must be also be set') |
| 148 | 157 |
| 149 def get(self, for_git): | 158 def get(self, for_git): |
| 150 if for_git or not self.source_filename: | 159 if for_git or not self.source_filename: |
| 151 return self.diff_header + self.diff_hunks | 160 return self.diff_header + self.diff_hunks |
| 152 else: | 161 else: |
| 153 # patch is stupid. It patches the source_filename instead so get rid of | 162 # patch is stupid. It patches the source_filename instead so get rid of |
| 154 # any source_filename reference if needed. | 163 # any source_filename reference if needed. |
| 155 return ( | 164 return ( |
| 156 self.diff_header.replace(self.source_filename, self.filename) + | 165 self.diff_header.replace( |
| 166 self.source_filename_utf8, self.filename_utf8) + |
| 157 self.diff_hunks) | 167 self.diff_hunks) |
| 158 | 168 |
| 159 def set_relpath(self, relpath): | 169 def set_relpath(self, relpath): |
| 160 old_filename = self.filename | 170 old_filename = self.filename_utf8 |
| 161 old_source_filename = self.source_filename or self.filename | 171 old_source_filename = self.source_filename_utf8 or self.filename_utf8 |
| 162 super(FilePatchDiff, self).set_relpath(relpath) | 172 super(FilePatchDiff, self).set_relpath(relpath) |
| 163 # Update the header too. | 173 # Update the header too. |
| 164 source_filename = self.source_filename or self.filename | 174 filename = self.filename_utf8 |
| 175 source_filename = self.source_filename_utf8 or self.filename_utf8 |
| 165 lines = self.diff_header.splitlines(True) | 176 lines = self.diff_header.splitlines(True) |
| 166 for i, line in enumerate(lines): | 177 for i, line in enumerate(lines): |
| 167 if line.startswith('diff --git'): | 178 if line.startswith('diff --git'): |
| 168 lines[i] = line.replace( | 179 lines[i] = line.replace( |
| 169 'a/' + old_source_filename, source_filename).replace( | 180 'a/' + old_source_filename, source_filename).replace( |
| 170 'b/' + old_filename, self.filename) | 181 'b/' + old_filename, filename) |
| 171 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'): | 182 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'): |
| 172 lines[i] = line.replace(old_source_filename, source_filename) | 183 lines[i] = line.replace(old_source_filename, source_filename) |
| 173 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'): | 184 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'): |
| 174 lines[i] = line.replace(old_filename, self.filename) | 185 lines[i] = line.replace(old_filename, filename) |
| 175 self.diff_header = ''.join(lines) | 186 self.diff_header = ''.join(lines) |
| 176 | 187 |
| 177 def _split_header(self, diff): | 188 def _split_header(self, diff): |
| 178 """Splits a diff in two: the header and the hunks.""" | 189 """Splits a diff in two: the header and the hunks.""" |
| 179 header = [] | 190 header = [] |
| 180 hunks = diff.splitlines(True) | 191 hunks = diff.splitlines(True) |
| 181 while hunks: | 192 while hunks: |
| 182 header.append(hunks.pop(0)) | 193 header.append(hunks.pop(0)) |
| 183 if header[-1].startswith('--- '): | 194 if header[-1].startswith('--- '): |
| 184 break | 195 break |
| 185 else: | 196 else: |
| 186 # Some diff may not have a ---/+++ set like a git rename with no change or | 197 # Some diff may not have a ---/+++ set like a git rename with no change or |
| 187 # a svn diff with only property change. | 198 # a svn diff with only property change. |
| 188 pass | 199 pass |
| 189 | 200 |
| 190 if hunks: | 201 if hunks: |
| 191 if not hunks[0].startswith('+++ '): | 202 if not hunks[0].startswith('+++ '): |
| 192 self._fail('Inconsistent header') | 203 self._fail('Inconsistent header') |
| 193 header.append(hunks.pop(0)) | 204 header.append(hunks.pop(0)) |
| 194 if hunks: | 205 if hunks: |
| 195 if not hunks[0].startswith('@@ '): | 206 if not hunks[0].startswith('@@ '): |
| 196 self._fail('Inconsistent hunk header') | 207 self._fail('Inconsistent hunk header') |
| 197 | 208 |
| 198 # Mangle any \\ in the header to /. | 209 # Mangle any \\ in the header to /. |
| 199 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') | 210 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') |
| 200 basename = os.path.basename(self.filename) | 211 basename = os.path.basename(self.filename_utf8) |
| 201 for i in xrange(len(header)): | 212 for i in xrange(len(header)): |
| 202 if (header[i].split(' ', 1)[0] in header_lines or | 213 if (header[i].split(' ', 1)[0] in header_lines or |
| 203 header[i].endswith(basename)): | 214 header[i].endswith(basename)): |
| 204 header[i] = header[i].replace('\\', '/') | 215 header[i] = header[i].replace('\\', '/') |
| 205 return ''.join(header), ''.join(hunks) | 216 return ''.join(header), ''.join(hunks) |
| 206 | 217 |
| 207 @staticmethod | 218 @staticmethod |
| 208 def _is_git_diff_header(diff_header): | 219 def _is_git_diff_header(diff_header): |
| 209 """Returns True if the diff for a single files was generated with git.""" | 220 """Returns True if the diff for a single files was generated with git.""" |
| 210 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff | 221 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff |
| (...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 307 while lines: | 318 while lines: |
| 308 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) | 319 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) |
| 309 if not match: | 320 if not match: |
| 310 continue | 321 continue |
| 311 if match.group(1).startswith('a/') and match.group(2).startswith('b/'): | 322 if match.group(1).startswith('a/') and match.group(2).startswith('b/'): |
| 312 self.patchlevel = 1 | 323 self.patchlevel = 1 |
| 313 old = self.mangle(match.group(1)) | 324 old = self.mangle(match.group(1)) |
| 314 new = self.mangle(match.group(2)) | 325 new = self.mangle(match.group(2)) |
| 315 | 326 |
| 316 # The rename is about the new file so the old file can be anything. | 327 # The rename is about the new file so the old file can be anything. |
| 317 if new not in (self.filename, 'dev/null'): | 328 if new not in (self.filename_utf8, 'dev/null'): |
| 318 self._fail('Unexpected git diff output name %s.' % new) | 329 self._fail('Unexpected git diff output name %s.' % new) |
| 319 if old == 'dev/null' and new == 'dev/null': | 330 if old == 'dev/null' and new == 'dev/null': |
| 320 self._fail('Unexpected /dev/null git diff.') | 331 self._fail('Unexpected /dev/null git diff.') |
| 321 break | 332 break |
| 322 | 333 |
| 323 if not old or not new: | 334 if not old or not new: |
| 324 self._fail('Unexpected git diff; couldn\'t find git header.') | 335 self._fail('Unexpected git diff; couldn\'t find git header.') |
| 325 | 336 |
| 326 if old not in (self.filename, 'dev/null'): | 337 if old not in (self.filename_utf8, 'dev/null'): |
| 327 # Copy or rename. | 338 # Copy or rename. |
| 328 self.source_filename = old | 339 self.source_filename = old.decode('utf-8') |
| 329 self.is_new = True | 340 self.is_new = True |
| 330 | 341 |
| 331 last_line = '' | 342 last_line = '' |
| 332 | 343 |
| 333 while lines: | 344 while lines: |
| 334 line = lines.pop(0) | 345 line = lines.pop(0) |
| 335 self._verify_git_header_process_line(lines, line, last_line) | 346 self._verify_git_header_process_line(lines, line, last_line) |
| 336 last_line = line | 347 last_line = line |
| 337 | 348 |
| 338 # Cheap check to make sure the file name is at least mentioned in the | 349 # Cheap check to make sure the file name is at least mentioned in the |
| 339 # 'diff' header. That the only remaining invariant. | 350 # 'diff' header. That the only remaining invariant. |
| 340 if not self.filename in self.diff_header: | 351 if not self.filename_utf8 in self.diff_header: |
| 341 self._fail('Diff seems corrupted.') | 352 self._fail('Diff seems corrupted.') |
| 342 | 353 |
| 343 def _verify_git_header_process_line(self, lines, line, last_line): | 354 def _verify_git_header_process_line(self, lines, line, last_line): |
| 344 """Processes a single line of the header. | 355 """Processes a single line of the header. |
| 345 | 356 |
| 346 Returns True if it should continue looping. | 357 Returns True if it should continue looping. |
| 347 | 358 |
| 348 Format is described to | 359 Format is described to |
| 349 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html | 360 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html |
| 350 """ | 361 """ |
| 351 match = re.match(r'^(rename|copy) from (.+)$', line) | 362 match = re.match(r'^(rename|copy) from (.+)$', line) |
| 352 old = self.source_filename or self.filename | 363 old = self.source_filename_utf8 or self.filename_utf8 |
| 353 if match: | 364 if match: |
| 354 if old != match.group(2): | 365 if old != match.group(2): |
| 355 self._fail('Unexpected git diff input name for line %s.' % line) | 366 self._fail('Unexpected git diff input name for line %s.' % line) |
| 356 if not lines or not lines[0].startswith('%s to ' % match.group(1)): | 367 if not lines or not lines[0].startswith('%s to ' % match.group(1)): |
| 357 self._fail( | 368 self._fail( |
| 358 'Confused %s from/to git diff for line %s.' % | 369 'Confused %s from/to git diff for line %s.' % |
| 359 (match.group(1), line)) | 370 (match.group(1), line)) |
| 360 return | 371 return |
| 361 | 372 |
| 362 match = re.match(r'^(rename|copy) to (.+)$', line) | 373 match = re.match(r'^(rename|copy) to (.+)$', line) |
| 363 if match: | 374 if match: |
| 364 if self.filename != match.group(2): | 375 if self.filename_utf8 != match.group(2): |
| 365 self._fail('Unexpected git diff output name for line %s.' % line) | 376 self._fail('Unexpected git diff output name for line %s.' % line) |
| 366 if not last_line.startswith('%s from ' % match.group(1)): | 377 if not last_line.startswith('%s from ' % match.group(1)): |
| 367 self._fail( | 378 self._fail( |
| 368 'Confused %s from/to git diff for line %s.' % | 379 'Confused %s from/to git diff for line %s.' % |
| 369 (match.group(1), line)) | 380 (match.group(1), line)) |
| 370 return | 381 return |
| 371 | 382 |
| 372 match = re.match(r'^deleted file mode (\d{6})$', line) | 383 match = re.match(r'^deleted file mode (\d{6})$', line) |
| 373 if match: | 384 if match: |
| 374 # It is necessary to parse it because there may be no hunk, like when the | 385 # It is necessary to parse it because there may be no hunk, like when the |
| (...skipping 22 matching lines...) Expand all Loading... |
| 397 if not lines or not lines[0].startswith('+++'): | 408 if not lines or not lines[0].startswith('+++'): |
| 398 self._fail('Missing git diff output name.') | 409 self._fail('Missing git diff output name.') |
| 399 return | 410 return |
| 400 | 411 |
| 401 match = re.match(r'^\+\+\+ (.*)$', line) | 412 match = re.match(r'^\+\+\+ (.*)$', line) |
| 402 if match: | 413 if match: |
| 403 if not last_line.startswith('---'): | 414 if not last_line.startswith('---'): |
| 404 self._fail('Unexpected git diff: --- not following +++.') | 415 self._fail('Unexpected git diff: --- not following +++.') |
| 405 if '/dev/null' == match.group(1): | 416 if '/dev/null' == match.group(1): |
| 406 self.is_delete = True | 417 self.is_delete = True |
| 407 elif self.filename != self.mangle(match.group(1)): | 418 elif self.filename_utf8 != self.mangle(match.group(1)): |
| 408 self._fail( | 419 self._fail( |
| 409 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1))) | 420 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1))) |
| 410 if lines: | 421 if lines: |
| 411 self._fail('Crap after +++') | 422 self._fail('Crap after +++') |
| 412 # We're done. | 423 # We're done. |
| 413 return | 424 return |
| 414 | 425 |
| 415 def _verify_svn_header(self): | 426 def _verify_svn_header(self): |
| 416 """Sanity checks the header. | 427 """Sanity checks the header. |
| 417 | 428 |
| 418 A svn diff can contain only property changes, in that case there will be no | 429 A svn diff can contain only property changes, in that case there will be no |
| 419 proper header. To make things worse, this property change header is | 430 proper header. To make things worse, this property change header is |
| 420 localized. | 431 localized. |
| 421 """ | 432 """ |
| 422 lines = self.diff_header.splitlines() | 433 lines = self.diff_header.splitlines() |
| 423 last_line = '' | 434 last_line = '' |
| 424 | 435 |
| 425 while lines: | 436 while lines: |
| 426 line = lines.pop(0) | 437 line = lines.pop(0) |
| 427 self._verify_svn_header_process_line(lines, line, last_line) | 438 self._verify_svn_header_process_line(lines, line, last_line) |
| 428 last_line = line | 439 last_line = line |
| 429 | 440 |
| 430 # Cheap check to make sure the file name is at least mentioned in the | 441 # Cheap check to make sure the file name is at least mentioned in the |
| 431 # 'diff' header. That the only remaining invariant. | 442 # 'diff' header. That the only remaining invariant. |
| 432 if not self.filename in self.diff_header: | 443 if not self.filename_utf8 in self.diff_header: |
| 433 self._fail('Diff seems corrupted.') | 444 self._fail('Diff seems corrupted.') |
| 434 | 445 |
| 435 def _verify_svn_header_process_line(self, lines, line, last_line): | 446 def _verify_svn_header_process_line(self, lines, line, last_line): |
| 436 """Processes a single line of the header. | 447 """Processes a single line of the header. |
| 437 | 448 |
| 438 Returns True if it should continue looping. | 449 Returns True if it should continue looping. |
| 439 """ | 450 """ |
| 440 match = re.match(r'^--- ([^\t]+).*$', line) | 451 match = re.match(r'^--- ([^\t]+).*$', line) |
| 441 if match: | 452 if match: |
| 442 if last_line[:3] in ('---', '+++'): | 453 if last_line[:3] in ('---', '+++'): |
| 443 self._fail('--- and +++ are reversed') | 454 self._fail('--- and +++ are reversed') |
| 444 if match.group(1) == '/dev/null': | 455 if match.group(1) == '/dev/null': |
| 445 self.is_new = True | 456 self.is_new = True |
| 446 elif self.mangle(match.group(1)) != self.filename: | 457 elif self.mangle(match.group(1)) != self.filename_utf8: |
| 447 # guess the source filename. | 458 # guess the source filename. |
| 448 self.source_filename = match.group(1) | 459 self.source_filename = match.group(1).decode('utf-8') |
| 449 self.is_new = True | 460 self.is_new = True |
| 450 if not lines or not lines[0].startswith('+++'): | 461 if not lines or not lines[0].startswith('+++'): |
| 451 self._fail('Nothing after header.') | 462 self._fail('Nothing after header.') |
| 452 return | 463 return |
| 453 | 464 |
| 454 match = re.match(r'^\+\+\+ ([^\t]+).*$', line) | 465 match = re.match(r'^\+\+\+ ([^\t]+).*$', line) |
| 455 if match: | 466 if match: |
| 456 if not last_line.startswith('---'): | 467 if not last_line.startswith('---'): |
| 457 self._fail('Unexpected diff: --- not following +++.') | 468 self._fail('Unexpected diff: --- not following +++.') |
| 458 if match.group(1) == '/dev/null': | 469 if match.group(1) == '/dev/null': |
| 459 self.is_delete = True | 470 self.is_delete = True |
| 460 elif self.mangle(match.group(1)) != self.filename: | 471 elif self.mangle(match.group(1)) != self.filename_utf8: |
| 461 self._fail('Unexpected diff: %s.' % match.group(1)) | 472 self._fail('Unexpected diff: %s.' % match.group(1)) |
| 462 if lines: | 473 if lines: |
| 463 self._fail('Crap after +++') | 474 self._fail('Crap after +++') |
| 464 # We're done. | 475 # We're done. |
| 465 return | 476 return |
| 466 | 477 |
| 467 | 478 |
| 468 class PatchSet(object): | 479 class PatchSet(object): |
| 469 """A list of FilePatch* objects.""" | 480 """A list of FilePatch* objects.""" |
| 470 | 481 |
| 471 def __init__(self, patches): | 482 def __init__(self, patches): |
| 472 for p in patches: | 483 for p in patches: |
| 473 assert isinstance(p, FilePatchBase) | 484 assert isinstance(p, FilePatchBase) |
| 474 | 485 |
| 475 def key(p): | 486 def key(p): |
| 476 """Sort by ordering of application. | 487 """Sort by ordering of application. |
| 477 | 488 |
| 478 File move are first. | 489 File move are first. |
| 479 Deletes are last. | 490 Deletes are last. |
| 480 """ | 491 """ |
| 481 if p.source_filename: | 492 if p.source_filename: |
| 482 return (p.is_delete, p.source_filename, p.filename) | 493 return (p.is_delete, p.source_filename_utf8, p.filename_utf8) |
| 483 else: | 494 else: |
| 484 # tuple are always greater than string, abuse that fact. | 495 # tuple are always greater than string, abuse that fact. |
| 485 return (p.is_delete, (p.filename,), p.filename) | 496 return (p.is_delete, (p.filename_utf8,), p.filename_utf8) |
| 486 | 497 |
| 487 self.patches = sorted(patches, key=key) | 498 self.patches = sorted(patches, key=key) |
| 488 | 499 |
| 489 def set_relpath(self, relpath): | 500 def set_relpath(self, relpath): |
| 490 """Used to offset the patch into a subdirectory.""" | 501 """Used to offset the patch into a subdirectory.""" |
| 491 for patch in self.patches: | 502 for patch in self.patches: |
| 492 patch.set_relpath(relpath) | 503 patch.set_relpath(relpath) |
| 493 | 504 |
| 494 def __iter__(self): | 505 def __iter__(self): |
| 495 for patch in self.patches: | 506 for patch in self.patches: |
| 496 yield patch | 507 yield patch |
| 497 | 508 |
| 498 def __getitem__(self, key): | 509 def __getitem__(self, key): |
| 499 return self.patches[key] | 510 return self.patches[key] |
| 500 | 511 |
| 501 @property | 512 @property |
| 502 def filenames(self): | 513 def filenames(self): |
| 503 return [p.filename for p in self.patches] | 514 return [p.filename for p in self.patches] |
| OLD | NEW |