Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: patch.py

Issue 9387024: Fix unicode upgrade of patch. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Created 8 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tests/patch_test.py » ('j') | tests/patch_test.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # coding=utf8 1 # coding=utf8
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 """Utility functions to handle patches.""" 5 """Utility functions to handle patches."""
6 6
7 import posixpath 7 import posixpath
8 import os 8 import os
9 import re 9 import re
10 10
(...skipping 19 matching lines...) Expand all
30 is_delete = False 30 is_delete = False
31 is_binary = False 31 is_binary = False
32 is_new = False 32 is_new = False
33 33
34 def __init__(self, filename): 34 def __init__(self, filename):
35 assert self.__class__ is not FilePatchBase 35 assert self.__class__ is not FilePatchBase
36 self.filename = self._process_filename(filename) 36 self.filename = self._process_filename(filename)
37 # Set when the file is copied or moved. 37 # Set when the file is copied or moved.
38 self.source_filename = None 38 self.source_filename = None
39 39
40 @property
41 def filename_utf8(self):
42 return self.filename.encode('utf-8')
43
44 @property
45 def source_filename_utf8(self):
46 if self.source_filename is not None:
47 return self.source_filename.encode('utf-8')
48
40 @staticmethod 49 @staticmethod
41 def _process_filename(filename): 50 def _process_filename(filename):
42 filename = filename.replace('\\', '/') 51 filename = filename.replace('\\', '/')
43 # Blacklist a few characters for simplicity. 52 # Blacklist a few characters for simplicity.
44 for i in ('%', '$', '..', '\'', '"'): 53 for i in ('%', '$', '..', '\'', '"'):
45 if i in filename: 54 if i in filename:
46 raise UnsupportedPatchFormat( 55 raise UnsupportedPatchFormat(
47 filename, 'Can\'t use \'%s\' in filename.' % i) 56 filename, 'Can\'t use \'%s\' in filename.' % i)
48 for i in ('/', 'CON', 'COM'): 57 for i in ('/', 'CON', 'COM'):
49 if filename.startswith(i): 58 if filename.startswith(i):
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
81 if self.is_new: 90 if self.is_new:
82 out += 'N' 91 out += 'N'
83 else: 92 else:
84 out += ' ' 93 out += ' '
85 if self.source_filename: 94 if self.source_filename:
86 out += 'R' 95 out += 'R'
87 else: 96 else:
88 out += ' ' 97 out += ' '
89 out += ' ' 98 out += ' '
90 if self.source_filename: 99 if self.source_filename:
91 out += '%s->' % self.source_filename 100 out += '%s->' % self.source_filename_utf8
92 return out + str(self.filename) 101 return out + self.filename_utf8
93 102
94 103
95 class FilePatchDelete(FilePatchBase): 104 class FilePatchDelete(FilePatchBase):
96 """Deletes a file.""" 105 """Deletes a file."""
97 is_delete = True 106 is_delete = True
98 107
99 def __init__(self, filename, is_binary): 108 def __init__(self, filename, is_binary):
100 super(FilePatchDelete, self).__init__(filename) 109 super(FilePatchDelete, self).__init__(filename)
101 self.is_binary = is_binary 110 self.is_binary = is_binary
102 111
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
146 if self.source_filename and not self.is_new: 155 if self.source_filename and not self.is_new:
147 self._fail('If source_filename is set, is_new must be also be set') 156 self._fail('If source_filename is set, is_new must be also be set')
148 157
149 def get(self, for_git): 158 def get(self, for_git):
150 if for_git or not self.source_filename: 159 if for_git or not self.source_filename:
151 return self.diff_header + self.diff_hunks 160 return self.diff_header + self.diff_hunks
152 else: 161 else:
153 # patch is stupid. It patches the source_filename instead so get rid of 162 # patch is stupid. It patches the source_filename instead so get rid of
154 # any source_filename reference if needed. 163 # any source_filename reference if needed.
155 return ( 164 return (
156 self.diff_header.replace(self.source_filename, self.filename) + 165 self.diff_header.replace(
166 self.source_filename_utf8, self.filename_utf8) +
157 self.diff_hunks) 167 self.diff_hunks)
158 168
159 def set_relpath(self, relpath): 169 def set_relpath(self, relpath):
160 old_filename = self.filename 170 old_filename = self.filename_utf8
161 old_source_filename = self.source_filename or self.filename 171 old_source_filename = self.source_filename_utf8 or self.filename_utf8
162 super(FilePatchDiff, self).set_relpath(relpath) 172 super(FilePatchDiff, self).set_relpath(relpath)
163 # Update the header too. 173 # Update the header too.
164 source_filename = self.source_filename or self.filename 174 filename = self.filename_utf8
175 source_filename = self.source_filename_utf8 or self.filename_utf8
165 lines = self.diff_header.splitlines(True) 176 lines = self.diff_header.splitlines(True)
166 for i, line in enumerate(lines): 177 for i, line in enumerate(lines):
167 if line.startswith('diff --git'): 178 if line.startswith('diff --git'):
168 lines[i] = line.replace( 179 lines[i] = line.replace(
169 'a/' + old_source_filename, source_filename).replace( 180 'a/' + old_source_filename, source_filename).replace(
170 'b/' + old_filename, self.filename) 181 'b/' + old_filename, filename)
171 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'): 182 elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
172 lines[i] = line.replace(old_source_filename, source_filename) 183 lines[i] = line.replace(old_source_filename, source_filename)
173 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'): 184 elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
174 lines[i] = line.replace(old_filename, self.filename) 185 lines[i] = line.replace(old_filename, filename)
175 self.diff_header = ''.join(lines) 186 self.diff_header = ''.join(lines)
176 187
177 def _split_header(self, diff): 188 def _split_header(self, diff):
178 """Splits a diff in two: the header and the hunks.""" 189 """Splits a diff in two: the header and the hunks."""
179 header = [] 190 header = []
180 hunks = diff.splitlines(True) 191 hunks = diff.splitlines(True)
181 while hunks: 192 while hunks:
182 header.append(hunks.pop(0)) 193 header.append(hunks.pop(0))
183 if header[-1].startswith('--- '): 194 if header[-1].startswith('--- '):
184 break 195 break
185 else: 196 else:
186 # Some diff may not have a ---/+++ set like a git rename with no change or 197 # Some diff may not have a ---/+++ set like a git rename with no change or
187 # a svn diff with only property change. 198 # a svn diff with only property change.
188 pass 199 pass
189 200
190 if hunks: 201 if hunks:
191 if not hunks[0].startswith('+++ '): 202 if not hunks[0].startswith('+++ '):
192 self._fail('Inconsistent header') 203 self._fail('Inconsistent header')
193 header.append(hunks.pop(0)) 204 header.append(hunks.pop(0))
194 if hunks: 205 if hunks:
195 if not hunks[0].startswith('@@ '): 206 if not hunks[0].startswith('@@ '):
196 self._fail('Inconsistent hunk header') 207 self._fail('Inconsistent hunk header')
197 208
198 # Mangle any \\ in the header to /. 209 # Mangle any \\ in the header to /.
199 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') 210 header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
200 basename = os.path.basename(self.filename) 211 basename = os.path.basename(self.filename_utf8)
201 for i in xrange(len(header)): 212 for i in xrange(len(header)):
202 if (header[i].split(' ', 1)[0] in header_lines or 213 if (header[i].split(' ', 1)[0] in header_lines or
203 header[i].endswith(basename)): 214 header[i].endswith(basename)):
204 header[i] = header[i].replace('\\', '/') 215 header[i] = header[i].replace('\\', '/')
205 return ''.join(header), ''.join(hunks) 216 return ''.join(header), ''.join(hunks)
206 217
207 @staticmethod 218 @staticmethod
208 def _is_git_diff_header(diff_header): 219 def _is_git_diff_header(diff_header):
209 """Returns True if the diff for a single files was generated with git.""" 220 """Returns True if the diff for a single files was generated with git."""
210 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff 221 # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
307 while lines: 318 while lines:
308 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) 319 match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0))
309 if not match: 320 if not match:
310 continue 321 continue
311 if match.group(1).startswith('a/') and match.group(2).startswith('b/'): 322 if match.group(1).startswith('a/') and match.group(2).startswith('b/'):
312 self.patchlevel = 1 323 self.patchlevel = 1
313 old = self.mangle(match.group(1)) 324 old = self.mangle(match.group(1))
314 new = self.mangle(match.group(2)) 325 new = self.mangle(match.group(2))
315 326
316 # The rename is about the new file so the old file can be anything. 327 # The rename is about the new file so the old file can be anything.
317 if new not in (self.filename, 'dev/null'): 328 if new not in (self.filename_utf8, 'dev/null'):
318 self._fail('Unexpected git diff output name %s.' % new) 329 self._fail('Unexpected git diff output name %s.' % new)
319 if old == 'dev/null' and new == 'dev/null': 330 if old == 'dev/null' and new == 'dev/null':
320 self._fail('Unexpected /dev/null git diff.') 331 self._fail('Unexpected /dev/null git diff.')
321 break 332 break
322 333
323 if not old or not new: 334 if not old or not new:
324 self._fail('Unexpected git diff; couldn\'t find git header.') 335 self._fail('Unexpected git diff; couldn\'t find git header.')
325 336
326 if old not in (self.filename, 'dev/null'): 337 if old not in (self.filename_utf8, 'dev/null'):
327 # Copy or rename. 338 # Copy or rename.
328 self.source_filename = old 339 self.source_filename = old.decode('utf-8')
329 self.is_new = True 340 self.is_new = True
330 341
331 last_line = '' 342 last_line = ''
332 343
333 while lines: 344 while lines:
334 line = lines.pop(0) 345 line = lines.pop(0)
335 self._verify_git_header_process_line(lines, line, last_line) 346 self._verify_git_header_process_line(lines, line, last_line)
336 last_line = line 347 last_line = line
337 348
338 # Cheap check to make sure the file name is at least mentioned in the 349 # Cheap check to make sure the file name is at least mentioned in the
339 # 'diff' header. That the only remaining invariant. 350 # 'diff' header. That the only remaining invariant.
340 if not self.filename in self.diff_header: 351 if not self.filename_utf8 in self.diff_header:
341 self._fail('Diff seems corrupted.') 352 self._fail('Diff seems corrupted.')
342 353
343 def _verify_git_header_process_line(self, lines, line, last_line): 354 def _verify_git_header_process_line(self, lines, line, last_line):
344 """Processes a single line of the header. 355 """Processes a single line of the header.
345 356
346 Returns True if it should continue looping. 357 Returns True if it should continue looping.
347 358
348 Format is described to 359 Format is described to
349 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html 360 http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
350 """ 361 """
351 match = re.match(r'^(rename|copy) from (.+)$', line) 362 match = re.match(r'^(rename|copy) from (.+)$', line)
352 old = self.source_filename or self.filename 363 old = self.source_filename_utf8 or self.filename_utf8
353 if match: 364 if match:
354 if old != match.group(2): 365 if old != match.group(2):
355 self._fail('Unexpected git diff input name for line %s.' % line) 366 self._fail('Unexpected git diff input name for line %s.' % line)
356 if not lines or not lines[0].startswith('%s to ' % match.group(1)): 367 if not lines or not lines[0].startswith('%s to ' % match.group(1)):
357 self._fail( 368 self._fail(
358 'Confused %s from/to git diff for line %s.' % 369 'Confused %s from/to git diff for line %s.' %
359 (match.group(1), line)) 370 (match.group(1), line))
360 return 371 return
361 372
362 match = re.match(r'^(rename|copy) to (.+)$', line) 373 match = re.match(r'^(rename|copy) to (.+)$', line)
363 if match: 374 if match:
364 if self.filename != match.group(2): 375 if self.filename_utf8 != match.group(2):
365 self._fail('Unexpected git diff output name for line %s.' % line) 376 self._fail('Unexpected git diff output name for line %s.' % line)
366 if not last_line.startswith('%s from ' % match.group(1)): 377 if not last_line.startswith('%s from ' % match.group(1)):
367 self._fail( 378 self._fail(
368 'Confused %s from/to git diff for line %s.' % 379 'Confused %s from/to git diff for line %s.' %
369 (match.group(1), line)) 380 (match.group(1), line))
370 return 381 return
371 382
372 match = re.match(r'^deleted file mode (\d{6})$', line) 383 match = re.match(r'^deleted file mode (\d{6})$', line)
373 if match: 384 if match:
374 # It is necessary to parse it because there may be no hunk, like when the 385 # It is necessary to parse it because there may be no hunk, like when the
(...skipping 22 matching lines...) Expand all
397 if not lines or not lines[0].startswith('+++'): 408 if not lines or not lines[0].startswith('+++'):
398 self._fail('Missing git diff output name.') 409 self._fail('Missing git diff output name.')
399 return 410 return
400 411
401 match = re.match(r'^\+\+\+ (.*)$', line) 412 match = re.match(r'^\+\+\+ (.*)$', line)
402 if match: 413 if match:
403 if not last_line.startswith('---'): 414 if not last_line.startswith('---'):
404 self._fail('Unexpected git diff: --- not following +++.') 415 self._fail('Unexpected git diff: --- not following +++.')
405 if '/dev/null' == match.group(1): 416 if '/dev/null' == match.group(1):
406 self.is_delete = True 417 self.is_delete = True
407 elif self.filename != self.mangle(match.group(1)): 418 elif self.filename_utf8 != self.mangle(match.group(1)):
408 self._fail( 419 self._fail(
409 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1))) 420 'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
410 if lines: 421 if lines:
411 self._fail('Crap after +++') 422 self._fail('Crap after +++')
412 # We're done. 423 # We're done.
413 return 424 return
414 425
415 def _verify_svn_header(self): 426 def _verify_svn_header(self):
416 """Sanity checks the header. 427 """Sanity checks the header.
417 428
418 A svn diff can contain only property changes, in that case there will be no 429 A svn diff can contain only property changes, in that case there will be no
419 proper header. To make things worse, this property change header is 430 proper header. To make things worse, this property change header is
420 localized. 431 localized.
421 """ 432 """
422 lines = self.diff_header.splitlines() 433 lines = self.diff_header.splitlines()
423 last_line = '' 434 last_line = ''
424 435
425 while lines: 436 while lines:
426 line = lines.pop(0) 437 line = lines.pop(0)
427 self._verify_svn_header_process_line(lines, line, last_line) 438 self._verify_svn_header_process_line(lines, line, last_line)
428 last_line = line 439 last_line = line
429 440
430 # Cheap check to make sure the file name is at least mentioned in the 441 # Cheap check to make sure the file name is at least mentioned in the
431 # 'diff' header. That the only remaining invariant. 442 # 'diff' header. That the only remaining invariant.
432 if not self.filename in self.diff_header: 443 if not self.filename_utf8 in self.diff_header:
433 self._fail('Diff seems corrupted.') 444 self._fail('Diff seems corrupted.')
434 445
435 def _verify_svn_header_process_line(self, lines, line, last_line): 446 def _verify_svn_header_process_line(self, lines, line, last_line):
436 """Processes a single line of the header. 447 """Processes a single line of the header.
437 448
438 Returns True if it should continue looping. 449 Returns True if it should continue looping.
439 """ 450 """
440 match = re.match(r'^--- ([^\t]+).*$', line) 451 match = re.match(r'^--- ([^\t]+).*$', line)
441 if match: 452 if match:
442 if last_line[:3] in ('---', '+++'): 453 if last_line[:3] in ('---', '+++'):
443 self._fail('--- and +++ are reversed') 454 self._fail('--- and +++ are reversed')
444 if match.group(1) == '/dev/null': 455 if match.group(1) == '/dev/null':
445 self.is_new = True 456 self.is_new = True
446 elif self.mangle(match.group(1)) != self.filename: 457 elif self.mangle(match.group(1)) != self.filename_utf8:
447 # guess the source filename. 458 # guess the source filename.
448 self.source_filename = match.group(1) 459 self.source_filename = match.group(1).decode('utf-8')
449 self.is_new = True 460 self.is_new = True
450 if not lines or not lines[0].startswith('+++'): 461 if not lines or not lines[0].startswith('+++'):
451 self._fail('Nothing after header.') 462 self._fail('Nothing after header.')
452 return 463 return
453 464
454 match = re.match(r'^\+\+\+ ([^\t]+).*$', line) 465 match = re.match(r'^\+\+\+ ([^\t]+).*$', line)
455 if match: 466 if match:
456 if not last_line.startswith('---'): 467 if not last_line.startswith('---'):
457 self._fail('Unexpected diff: --- not following +++.') 468 self._fail('Unexpected diff: --- not following +++.')
458 if match.group(1) == '/dev/null': 469 if match.group(1) == '/dev/null':
459 self.is_delete = True 470 self.is_delete = True
460 elif self.mangle(match.group(1)) != self.filename: 471 elif self.mangle(match.group(1)) != self.filename_utf8:
461 self._fail('Unexpected diff: %s.' % match.group(1)) 472 self._fail('Unexpected diff: %s.' % match.group(1))
462 if lines: 473 if lines:
463 self._fail('Crap after +++') 474 self._fail('Crap after +++')
464 # We're done. 475 # We're done.
465 return 476 return
466 477
467 478
468 class PatchSet(object): 479 class PatchSet(object):
469 """A list of FilePatch* objects.""" 480 """A list of FilePatch* objects."""
470 481
471 def __init__(self, patches): 482 def __init__(self, patches):
472 for p in patches: 483 for p in patches:
473 assert isinstance(p, FilePatchBase) 484 assert isinstance(p, FilePatchBase)
474 485
475 def key(p): 486 def key(p):
476 """Sort by ordering of application. 487 """Sort by ordering of application.
477 488
478 File move are first. 489 File move are first.
479 Deletes are last. 490 Deletes are last.
480 """ 491 """
481 if p.source_filename: 492 if p.source_filename:
482 return (p.is_delete, p.source_filename, p.filename) 493 return (p.is_delete, p.source_filename_utf8, p.filename_utf8)
483 else: 494 else:
484 # tuple are always greater than string, abuse that fact. 495 # tuple are always greater than string, abuse that fact.
485 return (p.is_delete, (p.filename,), p.filename) 496 return (p.is_delete, (p.filename_utf8,), p.filename_utf8)
486 497
487 self.patches = sorted(patches, key=key) 498 self.patches = sorted(patches, key=key)
488 499
489 def set_relpath(self, relpath): 500 def set_relpath(self, relpath):
490 """Used to offset the patch into a subdirectory.""" 501 """Used to offset the patch into a subdirectory."""
491 for patch in self.patches: 502 for patch in self.patches:
492 patch.set_relpath(relpath) 503 patch.set_relpath(relpath)
493 504
494 def __iter__(self): 505 def __iter__(self):
495 for patch in self.patches: 506 for patch in self.patches:
496 yield patch 507 yield patch
497 508
498 def __getitem__(self, key): 509 def __getitem__(self, key):
499 return self.patches[key] 510 return self.patches[key]
500 511
501 @property 512 @property
502 def filenames(self): 513 def filenames(self):
503 return [p.filename for p in self.patches] 514 return [p.filename for p in self.patches]
OLDNEW
« no previous file with comments | « no previous file | tests/patch_test.py » ('j') | tests/patch_test.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698