OLD | NEW |
1 # coding=utf8 | 1 # coding=utf8 |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 """Utility functions to handle patches.""" | 5 """Utility functions to handle patches.""" |
6 | 6 |
7 import posixpath | 7 import posixpath |
8 import os | 8 import os |
9 import re | 9 import re |
10 | 10 |
(...skipping 19 matching lines...) Expand all Loading... |
30 is_delete = False | 30 is_delete = False |
31 is_binary = False | 31 is_binary = False |
32 is_new = False | 32 is_new = False |
33 | 33 |
34 def __init__(self, filename): | 34 def __init__(self, filename): |
35 assert self.__class__ is not FilePatchBase | 35 assert self.__class__ is not FilePatchBase |
36 self.filename = self._process_filename(filename) | 36 self.filename = self._process_filename(filename) |
37 # Set when the file is copied or moved. | 37 # Set when the file is copied or moved. |
38 self.source_filename = None | 38 self.source_filename = None |
39 | 39 |
| 40 @property |
| 41 def filename_utf8(self): |
| 42 return self.filename.encode('utf-8') |
| 43 |
| 44 @property |
| 45 def source_filename_utf8(self): |
| 46 if self.source_filename is not None: |
| 47 return self.source_filename.encode('utf-8') |
| 48 |
40 @staticmethod | 49 @staticmethod |
41 def _process_filename(filename): | 50 def _process_filename(filename): |
42 filename = filename.replace('\\', '/') | 51 filename = filename.replace('\\', '/') |
43 # Blacklist a few characters for simplicity. | 52 # Blacklist a few characters for simplicity. |
44 for i in ('%', '$', '..', '\'', '"'): | 53 for i in ('%', '$', '..', '\'', '"'): |
45 if i in filename: | 54 if i in filename: |
46 raise UnsupportedPatchFormat( | 55 raise UnsupportedPatchFormat( |
47 filename, 'Can\'t use \'%s\' in filename.' % i) | 56 filename, 'Can\'t use \'%s\' in filename.' % i) |
48 for i in ('/', 'CON', 'COM'): | 57 for i in ('/', 'CON', 'COM'): |
49 if filename.startswith(i): | 58 if filename.startswith(i): |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
81 if self.is_new: | 90 if self.is_new: |
82 out += 'N' | 91 out += 'N' |
83 else: | 92 else: |
84 out += ' ' | 93 out += ' ' |
85 if self.source_filename: | 94 if self.source_filename: |
86 out += 'R' | 95 out += 'R' |
87 else: | 96 else: |
88 out += ' ' | 97 out += ' ' |
89 out += ' ' | 98 out += ' ' |
90 if self.source_filename: | 99 if self.source_filename: |
91 out += '%s->' % self.source_filename | 100 out += '%s->' % self.source_filename_utf8 |
92 return out + str(self.filename) | 101 return out + self.filename_utf8 |
93 | 102 |
94 | 103 |
95 class FilePatchDelete(FilePatchBase): | 104 class FilePatchDelete(FilePatchBase): |
96 """Deletes a file.""" | 105 """Deletes a file.""" |
97 is_delete = True | 106 is_delete = True |
98 | 107 |
99 def __init__(self, filename, is_binary): | 108 def __init__(self, filename, is_binary): |
100 super(FilePatchDelete, self).__init__(filename) | 109 super(FilePatchDelete, self).__init__(filename) |
101 self.is_binary = is_binary | 110 self.is_binary = is_binary |
102 | 111 |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
146 if self.source_filename and not self.is_new: | 155 if self.source_filename and not self.is_new: |
147 self._fail('If source_filename is set, is_new must be also be set') | 156 self._fail('If source_filename is set, is_new must be also be set') |
148 | 157 |
149 def get(self, for_git): | 158 def get(self, for_git): |
150 if for_git or not self.source_filename: | 159 if for_git or not self.source_filename: |
151 return self.diff_header + self.diff_hunks | 160 return self.diff_header + self.diff_hunks |
152 else: | 161 else: |
153 # patch is stupid. It patches the source_filename instead so get rid of | 162 # patch is stupid. It patches the source_filename instead so get rid of |
154 # any source_filename reference if needed. | 163 # any source_filename reference if needed. |
155 return ( | 164 return ( |
156 self.diff_header.replace(self.source_filename, self.filename) + | 165 self.diff_header.replace( |
| 166 self.source_filename_utf8, self.filename_utf8) + |
157 self.diff_hunks) | 167 self.diff_hunks) |
158 | 168 |
159 def set_relpath(self, relpath): | 169 def set_relpath(self, relpath): |
160 old_filename = self.filename | 170 old_filename = self.filename_utf8 |
161 old_source_filename = self.source_filename or self.filename | 171 old_source_filename = self.source_filename_utf8 or self.filename_utf8 |
162 super(FilePatchDiff, self).set_relpath(relpath) | 172 super(FilePatchDiff, self).set_relpath(relpath) |
163 # Update the header too. | 173 # Update the header too. |
164 source_filename = self.source_filename or self.filename | 174 filename = self.filename_utf8 |
| 175 source_filename = self.source_filename_utf8 or self.filename_utf8 |
165 lines = self.diff_header.splitlines(True) | 176 lines = self.diff_header.splitlines(True) |
166 for i, line in enumerate(lines): | 177 for i, line in enumerate(lines): |
167 if line.startswith('diff --git'): | 178 if line.startswith('diff --git'): |
168 lines[i] = line.replace( | 179 lines[i] = line.replace( |
169 'a/' + old_source_filename, source_filename).replace( | 180 'a/' + old_source_filename, source_filename).replace( |
170 'b/' + old_filename, self.filename) | 181 'b/' + old_filename, filename) |
171 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'): | 182 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'): |
172 lines[i] = line.replace(old_source_filename, source_filename) | 183 lines[i] = line.replace(old_source_filename, source_filename) |
173 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'): | 184 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'): |
174 lines[i] = line.replace(old_filename, self.filename) | 185 lines[i] = line.replace(old_filename, filename) |
175 self.diff_header = ''.join(lines) | 186 self.diff_header = ''.join(lines) |
176 | 187 |
177 def _split_header(self, diff): | 188 def _split_header(self, diff): |
178 """Splits a diff in two: the header and the hunks.""" | 189 """Splits a diff in two: the header and the hunks.""" |
179 header = [] | 190 header = [] |
180 hunks = diff.splitlines(True) | 191 hunks = diff.splitlines(True) |
181 while hunks: | 192 while hunks: |
182 header.append(hunks.pop(0)) | 193 header.append(hunks.pop(0)) |
183 if header[-1].startswith('--- '): | 194 if header[-1].startswith('--- '): |
184 break | 195 break |
185 else: | 196 else: |
186 # Some diff may not have a ---/+++ set like a git rename with no change or | 197 # Some diff may not have a ---/+++ set like a git rename with no change or |
187 # a svn diff with only property change. | 198 # a svn diff with only property change. |
188 pass | 199 pass |
189 | 200 |
190 if hunks: | 201 if hunks: |
191 if not hunks[0].startswith('+++ '): | 202 if not hunks[0].startswith('+++ '): |
192 self._fail('Inconsistent header') | 203 self._fail('Inconsistent header') |
193 header.append(hunks.pop(0)) | 204 header.append(hunks.pop(0)) |
194 if hunks: | 205 if hunks: |
195 if not hunks[0].startswith('@@ '): | 206 if not hunks[0].startswith('@@ '): |
196 self._fail('Inconsistent hunk header') | 207 self._fail('Inconsistent hunk header') |
197 | 208 |
198 # Mangle any \\ in the header to /. | 209 # Mangle any \\ in the header to /. |
199 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') | 210 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') |
200 basename = os.path.basename(self.filename) | 211 basename = os.path.basename(self.filename_utf8) |
201 for i in xrange(len(header)): | 212 for i in xrange(len(header)): |
202 if (header[i].split(' ', 1)[0] in header_lines or | 213 if (header[i].split(' ', 1)[0] in header_lines or |
203 header[i].endswith(basename)): | 214 header[i].endswith(basename)): |
204 header[i] = header[i].replace('\\', '/') | 215 header[i] = header[i].replace('\\', '/') |
205 return ''.join(header), ''.join(hunks) | 216 return ''.join(header), ''.join(hunks) |
206 | 217 |
207 @staticmethod | 218 @staticmethod |
208 def _is_git_diff_header(diff_header): | 219 def _is_git_diff_header(diff_header): |
209 """Returns True if the diff for a single files was generated with git.""" | 220 """Returns True if the diff for a single files was generated with git.""" |
210 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff | 221 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff |
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
307 while lines: | 318 while lines: |
308 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) | 319 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) |
309 if not match: | 320 if not match: |
310 continue | 321 continue |
311 if match.group(1).startswith('a/') and match.group(2).startswith('b/'): | 322 if match.group(1).startswith('a/') and match.group(2).startswith('b/'): |
312 self.patchlevel = 1 | 323 self.patchlevel = 1 |
313 old = self.mangle(match.group(1)) | 324 old = self.mangle(match.group(1)) |
314 new = self.mangle(match.group(2)) | 325 new = self.mangle(match.group(2)) |
315 | 326 |
316 # The rename is about the new file so the old file can be anything. | 327 # The rename is about the new file so the old file can be anything. |
317 if new not in (self.filename, 'dev/null'): | 328 if new not in (self.filename_utf8, 'dev/null'): |
318 self._fail('Unexpected git diff output name %s.' % new) | 329 self._fail('Unexpected git diff output name %s.' % new) |
319 if old == 'dev/null' and new == 'dev/null': | 330 if old == 'dev/null' and new == 'dev/null': |
320 self._fail('Unexpected /dev/null git diff.') | 331 self._fail('Unexpected /dev/null git diff.') |
321 break | 332 break |
322 | 333 |
323 if not old or not new: | 334 if not old or not new: |
324 self._fail('Unexpected git diff; couldn\'t find git header.') | 335 self._fail('Unexpected git diff; couldn\'t find git header.') |
325 | 336 |
326 if old not in (self.filename, 'dev/null'): | 337 if old not in (self.filename_utf8, 'dev/null'): |
327 # Copy or rename. | 338 # Copy or rename. |
328 self.source_filename = old | 339 self.source_filename = old.decode('utf-8') |
329 self.is_new = True | 340 self.is_new = True |
330 | 341 |
331 last_line = '' | 342 last_line = '' |
332 | 343 |
333 while lines: | 344 while lines: |
334 line = lines.pop(0) | 345 line = lines.pop(0) |
335 self._verify_git_header_process_line(lines, line, last_line) | 346 self._verify_git_header_process_line(lines, line, last_line) |
336 last_line = line | 347 last_line = line |
337 | 348 |
338 # Cheap check to make sure the file name is at least mentioned in the | 349 # Cheap check to make sure the file name is at least mentioned in the |
339 # 'diff' header. That the only remaining invariant. | 350 # 'diff' header. That the only remaining invariant. |
340 if not self.filename in self.diff_header: | 351 if not self.filename_utf8 in self.diff_header: |
341 self._fail('Diff seems corrupted.') | 352 self._fail('Diff seems corrupted.') |
342 | 353 |
343 def _verify_git_header_process_line(self, lines, line, last_line): | 354 def _verify_git_header_process_line(self, lines, line, last_line): |
344 """Processes a single line of the header. | 355 """Processes a single line of the header. |
345 | 356 |
346 Returns True if it should continue looping. | 357 Returns True if it should continue looping. |
347 | 358 |
348 Format is described to | 359 Format is described to |
349 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html | 360 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html |
350 """ | 361 """ |
351 match = re.match(r'^(rename|copy) from (.+)$', line) | 362 match = re.match(r'^(rename|copy) from (.+)$', line) |
352 old = self.source_filename or self.filename | 363 old = self.source_filename_utf8 or self.filename_utf8 |
353 if match: | 364 if match: |
354 if old != match.group(2): | 365 if old != match.group(2): |
355 self._fail('Unexpected git diff input name for line %s.' % line) | 366 self._fail('Unexpected git diff input name for line %s.' % line) |
356 if not lines or not lines[0].startswith('%s to ' % match.group(1)): | 367 if not lines or not lines[0].startswith('%s to ' % match.group(1)): |
357 self._fail( | 368 self._fail( |
358 'Confused %s from/to git diff for line %s.' % | 369 'Confused %s from/to git diff for line %s.' % |
359 (match.group(1), line)) | 370 (match.group(1), line)) |
360 return | 371 return |
361 | 372 |
362 match = re.match(r'^(rename|copy) to (.+)$', line) | 373 match = re.match(r'^(rename|copy) to (.+)$', line) |
363 if match: | 374 if match: |
364 if self.filename != match.group(2): | 375 if self.filename_utf8 != match.group(2): |
365 self._fail('Unexpected git diff output name for line %s.' % line) | 376 self._fail('Unexpected git diff output name for line %s.' % line) |
366 if not last_line.startswith('%s from ' % match.group(1)): | 377 if not last_line.startswith('%s from ' % match.group(1)): |
367 self._fail( | 378 self._fail( |
368 'Confused %s from/to git diff for line %s.' % | 379 'Confused %s from/to git diff for line %s.' % |
369 (match.group(1), line)) | 380 (match.group(1), line)) |
370 return | 381 return |
371 | 382 |
372 match = re.match(r'^deleted file mode (\d{6})$', line) | 383 match = re.match(r'^deleted file mode (\d{6})$', line) |
373 if match: | 384 if match: |
374 # It is necessary to parse it because there may be no hunk, like when the | 385 # It is necessary to parse it because there may be no hunk, like when the |
(...skipping 22 matching lines...) Expand all Loading... |
397 if not lines or not lines[0].startswith('+++'): | 408 if not lines or not lines[0].startswith('+++'): |
398 self._fail('Missing git diff output name.') | 409 self._fail('Missing git diff output name.') |
399 return | 410 return |
400 | 411 |
401 match = re.match(r'^\+\+\+ (.*)$', line) | 412 match = re.match(r'^\+\+\+ (.*)$', line) |
402 if match: | 413 if match: |
403 if not last_line.startswith('---'): | 414 if not last_line.startswith('---'): |
404 self._fail('Unexpected git diff: --- not following +++.') | 415 self._fail('Unexpected git diff: --- not following +++.') |
405 if '/dev/null' == match.group(1): | 416 if '/dev/null' == match.group(1): |
406 self.is_delete = True | 417 self.is_delete = True |
407 elif self.filename != self.mangle(match.group(1)): | 418 elif self.filename_utf8 != self.mangle(match.group(1)): |
408 self._fail( | 419 self._fail( |
409 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1))) | 420 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1))) |
410 if lines: | 421 if lines: |
411 self._fail('Crap after +++') | 422 self._fail('Crap after +++') |
412 # We're done. | 423 # We're done. |
413 return | 424 return |
414 | 425 |
415 def _verify_svn_header(self): | 426 def _verify_svn_header(self): |
416 """Sanity checks the header. | 427 """Sanity checks the header. |
417 | 428 |
418 A svn diff can contain only property changes, in that case there will be no | 429 A svn diff can contain only property changes, in that case there will be no |
419 proper header. To make things worse, this property change header is | 430 proper header. To make things worse, this property change header is |
420 localized. | 431 localized. |
421 """ | 432 """ |
422 lines = self.diff_header.splitlines() | 433 lines = self.diff_header.splitlines() |
423 last_line = '' | 434 last_line = '' |
424 | 435 |
425 while lines: | 436 while lines: |
426 line = lines.pop(0) | 437 line = lines.pop(0) |
427 self._verify_svn_header_process_line(lines, line, last_line) | 438 self._verify_svn_header_process_line(lines, line, last_line) |
428 last_line = line | 439 last_line = line |
429 | 440 |
430 # Cheap check to make sure the file name is at least mentioned in the | 441 # Cheap check to make sure the file name is at least mentioned in the |
431 # 'diff' header. That the only remaining invariant. | 442 # 'diff' header. That the only remaining invariant. |
432 if not self.filename in self.diff_header: | 443 if not self.filename_utf8 in self.diff_header: |
433 self._fail('Diff seems corrupted.') | 444 self._fail('Diff seems corrupted.') |
434 | 445 |
435 def _verify_svn_header_process_line(self, lines, line, last_line): | 446 def _verify_svn_header_process_line(self, lines, line, last_line): |
436 """Processes a single line of the header. | 447 """Processes a single line of the header. |
437 | 448 |
438 Returns True if it should continue looping. | 449 Returns True if it should continue looping. |
439 """ | 450 """ |
440 match = re.match(r'^--- ([^\t]+).*$', line) | 451 match = re.match(r'^--- ([^\t]+).*$', line) |
441 if match: | 452 if match: |
442 if last_line[:3] in ('---', '+++'): | 453 if last_line[:3] in ('---', '+++'): |
443 self._fail('--- and +++ are reversed') | 454 self._fail('--- and +++ are reversed') |
444 if match.group(1) == '/dev/null': | 455 if match.group(1) == '/dev/null': |
445 self.is_new = True | 456 self.is_new = True |
446 elif self.mangle(match.group(1)) != self.filename: | 457 elif self.mangle(match.group(1)) != self.filename_utf8: |
447 # guess the source filename. | 458 # guess the source filename. |
448 self.source_filename = match.group(1) | 459 self.source_filename = match.group(1).decode('utf-8') |
449 self.is_new = True | 460 self.is_new = True |
450 if not lines or not lines[0].startswith('+++'): | 461 if not lines or not lines[0].startswith('+++'): |
451 self._fail('Nothing after header.') | 462 self._fail('Nothing after header.') |
452 return | 463 return |
453 | 464 |
454 match = re.match(r'^\+\+\+ ([^\t]+).*$', line) | 465 match = re.match(r'^\+\+\+ ([^\t]+).*$', line) |
455 if match: | 466 if match: |
456 if not last_line.startswith('---'): | 467 if not last_line.startswith('---'): |
457 self._fail('Unexpected diff: --- not following +++.') | 468 self._fail('Unexpected diff: --- not following +++.') |
458 if match.group(1) == '/dev/null': | 469 if match.group(1) == '/dev/null': |
459 self.is_delete = True | 470 self.is_delete = True |
460 elif self.mangle(match.group(1)) != self.filename: | 471 elif self.mangle(match.group(1)) != self.filename_utf8: |
461 self._fail('Unexpected diff: %s.' % match.group(1)) | 472 self._fail('Unexpected diff: %s.' % match.group(1)) |
462 if lines: | 473 if lines: |
463 self._fail('Crap after +++') | 474 self._fail('Crap after +++') |
464 # We're done. | 475 # We're done. |
465 return | 476 return |
466 | 477 |
467 | 478 |
468 class PatchSet(object): | 479 class PatchSet(object): |
469 """A list of FilePatch* objects.""" | 480 """A list of FilePatch* objects.""" |
470 | 481 |
471 def __init__(self, patches): | 482 def __init__(self, patches): |
472 for p in patches: | 483 for p in patches: |
473 assert isinstance(p, FilePatchBase) | 484 assert isinstance(p, FilePatchBase) |
474 | 485 |
475 def key(p): | 486 def key(p): |
476 """Sort by ordering of application. | 487 """Sort by ordering of application. |
477 | 488 |
478 File move are first. | 489 File move are first. |
479 Deletes are last. | 490 Deletes are last. |
480 """ | 491 """ |
481 if p.source_filename: | 492 if p.source_filename: |
482 return (p.is_delete, p.source_filename, p.filename) | 493 return (p.is_delete, p.source_filename_utf8, p.filename_utf8) |
483 else: | 494 else: |
484 # tuple are always greater than string, abuse that fact. | 495 # tuple are always greater than string, abuse that fact. |
485 return (p.is_delete, (p.filename,), p.filename) | 496 return (p.is_delete, (p.filename_utf8,), p.filename_utf8) |
486 | 497 |
487 self.patches = sorted(patches, key=key) | 498 self.patches = sorted(patches, key=key) |
488 | 499 |
489 def set_relpath(self, relpath): | 500 def set_relpath(self, relpath): |
490 """Used to offset the patch into a subdirectory.""" | 501 """Used to offset the patch into a subdirectory.""" |
491 for patch in self.patches: | 502 for patch in self.patches: |
492 patch.set_relpath(relpath) | 503 patch.set_relpath(relpath) |
493 | 504 |
494 def __iter__(self): | 505 def __iter__(self): |
495 for patch in self.patches: | 506 for patch in self.patches: |
496 yield patch | 507 yield patch |
497 | 508 |
498 def __getitem__(self, key): | 509 def __getitem__(self, key): |
499 return self.patches[key] | 510 return self.patches[key] |
500 | 511 |
501 @property | 512 @property |
502 def filenames(self): | 513 def filenames(self): |
503 return [p.filename for p in self.patches] | 514 return [p.filename for p in self.patches] |
OLD | NEW |