Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(213)

Side by Side Diff: third_party/logilab/common/textutils.py

Issue 10447014: Add pylint to depot_tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Fix unittests. Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/logilab/common/testlib.py ('k') | third_party/logilab/common/tree.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
3 #
4 # This file is part of logilab-common.
5 #
6 # logilab-common is free software: you can redistribute it and/or modify it unde r
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation, either version 2.1 of the License, or (at your option) an y
9 # later version.
10 #
11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT
12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
14 # details.
15 #
16 # You should have received a copy of the GNU Lesser General Public License along
17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
18 """Some text manipulation utility functions.
19
20
21 :group text formatting: normalize_text, normalize_paragraph, pretty_match,\
22 unquote, colorize_ansi
23 :group text manipulation: searchall, splitstrip
24 :sort: text formatting, text manipulation
25
26 :type ANSI_STYLES: dict(str)
27 :var ANSI_STYLES: dictionary mapping style identifier to ANSI terminal code
28
29 :type ANSI_COLORS: dict(str)
30 :var ANSI_COLORS: dictionary mapping color identifier to ANSI terminal code
31
32 :type ANSI_PREFIX: str
33 :var ANSI_PREFIX:
34 ANSI terminal code notifying the start of an ANSI escape sequence
35
36 :type ANSI_END: str
37 :var ANSI_END:
38 ANSI terminal code notifying the end of an ANSI escape sequence
39
40 :type ANSI_RESET: str
41 :var ANSI_RESET:
42 ANSI terminal code resetting format defined by a previous ANSI escape sequence
43 """
44 __docformat__ = "restructuredtext en"
45
46 import sys
47 import re
48 import os.path as osp
49 from warnings import warn
50 from unicodedata import normalize as _uninormalize
51 try:
52 from os import linesep
53 except ImportError:
54 linesep = '\n' # gae
55
56 from logilab.common.deprecation import deprecated
57
58 MANUAL_UNICODE_MAP = {
59 u'\xa1': u'!', # INVERTED EXCLAMATION MARK
60 u'\u0142': u'l', # LATIN SMALL LETTER L WITH STROKE
61 u'\u2044': u'/', # FRACTION SLASH
62 u'\xc6': u'AE', # LATIN CAPITAL LETTER AE
63 u'\xa9': u'(c)', # COPYRIGHT SIGN
64 u'\xab': u'"', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
65 u'\xe6': u'ae', # LATIN SMALL LETTER AE
66 u'\xae': u'(r)', # REGISTERED SIGN
67 u'\u0153': u'oe', # LATIN SMALL LIGATURE OE
68 u'\u0152': u'OE', # LATIN CAPITAL LIGATURE OE
69 u'\xd8': u'O', # LATIN CAPITAL LETTER O WITH STROKE
70 u'\xf8': u'o', # LATIN SMALL LETTER O WITH STROKE
71 u'\xbb': u'"', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
72 u'\xdf': u'ss', # LATIN SMALL LETTER SHARP S
73 }
74
75 def unormalize(ustring, ignorenonascii=None, substitute=None):
76 """replace diacritical characters with their corresponding ascii characters
77
78 Convert the unicode string to its long normalized form (unicode character
79 will be transform into several characters) and keep the first one only.
80 The normal form KD (NFKD) will apply the compatibility decomposition, i.e.
81 replace all compatibility characters with their equivalents.
82
83 :type substitute: str
84 :param substitute: replacement character to use if decomposition fails
85
86 :see: Another project about ASCII transliterations of Unicode text
87 http://pypi.python.org/pypi/Unidecode
88 """
89 # backward compatibility, ignorenonascii was a boolean
90 if ignorenonascii is not None:
91 warn("ignorenonascii is deprecated, use substitute named parameter inste ad",
92 DeprecationWarning, stacklevel=2)
93 if ignorenonascii:
94 substitute = ''
95 res = []
96 for letter in ustring[:]:
97 try:
98 replacement = MANUAL_UNICODE_MAP[letter]
99 except KeyError:
100 replacement = _uninormalize('NFKD', letter)[0]
101 if ord(replacement) >= 2 ** 7:
102 if substitute is None:
103 raise ValueError("can't deal with non-ascii based characters ")
104 replacement = substitute
105 res.append(replacement)
106 return u''.join(res)
107
108 def unquote(string):
109 """remove optional quotes (simple or double) from the string
110
111 :type string: str or unicode
112 :param string: an optionally quoted string
113
114 :rtype: str or unicode
115 :return: the unquoted string (or the input string if it wasn't quoted)
116 """
117 if not string:
118 return string
119 if string[0] in '"\'':
120 string = string[1:]
121 if string[-1] in '"\'':
122 string = string[:-1]
123 return string
124
125
126 _BLANKLINES_RGX = re.compile('\r?\n\r?\n')
127 _NORM_SPACES_RGX = re.compile('\s+')
128
129 def normalize_text(text, line_len=80, indent='', rest=False):
130 """normalize a text to display it with a maximum line size and
131 optionally arbitrary indentation. Line jumps are normalized but blank
132 lines are kept. The indentation string may be used to insert a
133 comment (#) or a quoting (>) mark for instance.
134
135 :type text: str or unicode
136 :param text: the input text to normalize
137
138 :type line_len: int
139 :param line_len: expected maximum line's length, default to 80
140
141 :type indent: str or unicode
142 :param indent: optional string to use as indentation
143
144 :rtype: str or unicode
145 :return:
146 the input text normalized to fit on lines with a maximized size
147 inferior to `line_len`, and optionally prefixed by an
148 indentation string
149 """
150 if rest:
151 normp = normalize_rest_paragraph
152 else:
153 normp = normalize_paragraph
154 result = []
155 for text in _BLANKLINES_RGX.split(text):
156 result.append(normp(text, line_len, indent))
157 return ('%s%s%s' % (linesep, indent, linesep)).join(result)
158
159
160 def normalize_paragraph(text, line_len=80, indent=''):
161 """normalize a text to display it with a maximum line size and
162 optionally arbitrary indentation. Line jumps are normalized. The
163 indentation string may be used top insert a comment mark for
164 instance.
165
166 :type text: str or unicode
167 :param text: the input text to normalize
168
169 :type line_len: int
170 :param line_len: expected maximum line's length, default to 80
171
172 :type indent: str or unicode
173 :param indent: optional string to use as indentation
174
175 :rtype: str or unicode
176 :return:
177 the input text normalized to fit on lines with a maximized size
178 inferior to `line_len`, and optionally prefixed by an
179 indentation string
180 """
181 text = _NORM_SPACES_RGX.sub(' ', text)
182 line_len = line_len - len(indent)
183 lines = []
184 while text:
185 aline, text = splittext(text.strip(), line_len)
186 lines.append(indent + aline)
187 return linesep.join(lines)
188
189 def normalize_rest_paragraph(text, line_len=80, indent=''):
190 """normalize a ReST text to display it with a maximum line size and
191 optionally arbitrary indentation. Line jumps are normalized. The
192 indentation string may be used top insert a comment mark for
193 instance.
194
195 :type text: str or unicode
196 :param text: the input text to normalize
197
198 :type line_len: int
199 :param line_len: expected maximum line's length, default to 80
200
201 :type indent: str or unicode
202 :param indent: optional string to use as indentation
203
204 :rtype: str or unicode
205 :return:
206 the input text normalized to fit on lines with a maximized size
207 inferior to `line_len`, and optionally prefixed by an
208 indentation string
209 """
210 toreport = ''
211 lines = []
212 line_len = line_len - len(indent)
213 for line in text.splitlines():
214 line = toreport + _NORM_SPACES_RGX.sub(' ', line.strip())
215 toreport = ''
216 while len(line) > line_len:
217 # too long line, need split
218 line, toreport = splittext(line, line_len)
219 lines.append(indent + line)
220 if toreport:
221 line = toreport + ' '
222 toreport = ''
223 else:
224 line = ''
225 if line:
226 lines.append(indent + line.strip())
227 return linesep.join(lines)
228
229
230 def splittext(text, line_len):
231 """split the given text on space according to the given max line size
232
233 return a 2-uple:
234 * a line <= line_len if possible
235 * the rest of the text which has to be reported on another line
236 """
237 if len(text) <= line_len:
238 return text, ''
239 pos = min(len(text)-1, line_len)
240 while pos > 0 and text[pos] != ' ':
241 pos -= 1
242 if pos == 0:
243 pos = min(len(text), line_len)
244 while len(text) > pos and text[pos] != ' ':
245 pos += 1
246 return text[:pos], text[pos+1:].strip()
247
248
249 def splitstrip(string, sep=','):
250 """return a list of stripped string by splitting the string given as
251 argument on `sep` (',' by default). Empty string are discarded.
252
253 >>> splitstrip('a, b, c , 4,,')
254 ['a', 'b', 'c', '4']
255 >>> splitstrip('a')
256 ['a']
257 >>>
258
259 :type string: str or unicode
260 :param string: a csv line
261
262 :type sep: str or unicode
263 :param sep: field separator, default to the comma (',')
264
265 :rtype: str or unicode
266 :return: the unquoted string (or the input string if it wasn't quoted)
267 """
268 return [word.strip() for word in string.split(sep) if word.strip()]
269
270 get_csv = deprecated('get_csv is deprecated, use splitstrip')(splitstrip)
271
272
273 def split_url_or_path(url_or_path):
274 """return the latest component of a string containing either an url of the
275 form <scheme>://<path> or a local file system path
276 """
277 if '://' in url_or_path:
278 return url_or_path.rstrip('/').rsplit('/', 1)
279 return osp.split(url_or_path.rstrip(osp.sep))
280
281
282 def text_to_dict(text):
283 """parse multilines text containing simple 'key=value' lines and return a
284 dict of {'key': 'value'}. When the same key is encountered multiple time,
285 value is turned into a list containing all values.
286
287 >>> text_to_dict('''multiple=1
288 ... multiple= 2
289 ... single =3
290 ... ''')
291 {'single': '3', 'multiple': ['1', '2']}
292
293 """
294 res = {}
295 if not text:
296 return res
297 for line in text.splitlines():
298 line = line.strip()
299 if line and not line.startswith('#'):
300 key, value = [w.strip() for w in line.split('=', 1)]
301 if key in res:
302 try:
303 res[key].append(value)
304 except AttributeError:
305 res[key] = [res[key], value]
306 else:
307 res[key] = value
308 return res
309
310
311 _BLANK_URE = r'(\s|,)+'
312 _BLANK_RE = re.compile(_BLANK_URE)
313 __VALUE_URE = r'-?(([0-9]+\.[0-9]*)|((0x?)?[0-9]+))'
314 __UNITS_URE = r'[a-zA-Z]+'
315 _VALUE_RE = re.compile(r'(?P<value>%s)(?P<unit>%s)?'%(__VALUE_URE, __UNITS_URE))
316
317 BYTE_UNITS = {
318 "b": 1,
319 "kb": 1024,
320 "mb": 1024 ** 2,
321 "gb": 1024 ** 3,
322 "tb": 1024 ** 4,
323 }
324
325 TIME_UNITS = {
326 "ms": 0.0001,
327 "s": 1,
328 "min": 60,
329 "h": 60 * 60,
330 "d": 60 * 60 *24,
331 }
332
333 def apply_units(string, units, inter=None, final=float, blank_reg=_BLANK_RE,
334 value_reg=_VALUE_RE):
335 """Parse the string applying the units defined in units
336 (e.g.: "1.5m",{'m',60} -> 80).
337
338 :type string: str or unicode
339 :param string: the string to parse
340
341 :type units: dict (or any object with __getitem__ using basestring key)
342 :param units: a dict mapping a unit string repr to its value
343
344 :type inter: type
345 :param inter: used to parse every intermediate value (need __sum__)
346
347 :type blank_reg: regexp
348 :param blank_reg: should match every blank char to ignore.
349
350 :type value_reg: regexp with "value" and optional "unit" group
351 :param value_reg: match a value and it's unit into the
352 """
353 if inter is None:
354 inter = final
355 string = _BLANK_RE.sub('', string)
356 values = []
357 for match in value_reg.finditer(string):
358 dic = match.groupdict()
359 #import sys
360 #print >> sys.stderr, dic
361 lit, unit = dic["value"], dic.get("unit")
362 value = inter(lit)
363 if unit is not None:
364 try:
365 value *= units[unit.lower()]
366 except KeyError:
367 raise KeyError('invalid unit %s. valid units are %s' %
368 (unit, units.keys()))
369 values.append(value)
370 return final(sum(values))
371
372
373 _LINE_RGX = re.compile('\r\n|\r+|\n')
374
375 def pretty_match(match, string, underline_char='^'):
376 """return a string with the match location underlined:
377
378 >>> import re
379 >>> print(pretty_match(re.search('mange', 'il mange du bacon'), 'il mange du bacon'))
380 il mange du bacon
381 ^^^^^
382 >>>
383
384 :type match: _sre.SRE_match
385 :param match: object returned by re.match, re.search or re.finditer
386
387 :type string: str or unicode
388 :param string:
389 the string on which the regular expression has been applied to
390 obtain the `match` object
391
392 :type underline_char: str or unicode
393 :param underline_char:
394 character to use to underline the matched section, default to the
395 carret '^'
396
397 :rtype: str or unicode
398 :return:
399 the original string with an inserted line to underline the match
400 location
401 """
402 start = match.start()
403 end = match.end()
404 string = _LINE_RGX.sub(linesep, string)
405 start_line_pos = string.rfind(linesep, 0, start)
406 if start_line_pos == -1:
407 start_line_pos = 0
408 result = []
409 else:
410 result = [string[:start_line_pos]]
411 start_line_pos += len(linesep)
412 offset = start - start_line_pos
413 underline = ' ' * offset + underline_char * (end - start)
414 end_line_pos = string.find(linesep, end)
415 if end_line_pos == -1:
416 string = string[start_line_pos:]
417 result.append(string)
418 result.append(underline)
419 else:
420 end = string[end_line_pos + len(linesep):]
421 string = string[start_line_pos:end_line_pos]
422 result.append(string)
423 result.append(underline)
424 result.append(end)
425 return linesep.join(result).rstrip()
426
427
428 # Ansi colorization ###########################################################
429
430 ANSI_PREFIX = '\033['
431 ANSI_END = 'm'
432 ANSI_RESET = '\033[0m'
433 ANSI_STYLES = {
434 'reset': "0",
435 'bold': "1",
436 'italic': "3",
437 'underline': "4",
438 'blink': "5",
439 'inverse': "7",
440 'strike': "9",
441 }
442 ANSI_COLORS = {
443 'reset': "0",
444 'black': "30",
445 'red': "31",
446 'green': "32",
447 'yellow': "33",
448 'blue': "34",
449 'magenta': "35",
450 'cyan': "36",
451 'white': "37",
452 }
453
454 def _get_ansi_code(color=None, style=None):
455 """return ansi escape code corresponding to color and style
456
457 :type color: str or None
458 :param color:
459 the color name (see `ANSI_COLORS` for available values)
460 or the color number when 256 colors are available
461
462 :type style: str or None
463 :param style:
464 style string (see `ANSI_COLORS` for available values). To get
465 several style effects at the same time, use a coma as separator.
466
467 :raise KeyError: if an unexistent color or style identifier is given
468
469 :rtype: str
470 :return: the built escape code
471 """
472 ansi_code = []
473 if style:
474 style_attrs = splitstrip(style)
475 for effect in style_attrs:
476 ansi_code.append(ANSI_STYLES[effect])
477 if color:
478 if color.isdigit():
479 ansi_code.extend(['38', '5'])
480 ansi_code.append(color)
481 else:
482 ansi_code.append(ANSI_COLORS[color])
483 if ansi_code:
484 return ANSI_PREFIX + ';'.join(ansi_code) + ANSI_END
485 return ''
486
487 def colorize_ansi(msg, color=None, style=None):
488 """colorize message by wrapping it with ansi escape codes
489
490 :type msg: str or unicode
491 :param msg: the message string to colorize
492
493 :type color: str or None
494 :param color:
495 the color identifier (see `ANSI_COLORS` for available values)
496
497 :type style: str or None
498 :param style:
499 style string (see `ANSI_COLORS` for available values). To get
500 several style effects at the same time, use a coma as separator.
501
502 :raise KeyError: if an unexistent color or style identifier is given
503
504 :rtype: str or unicode
505 :return: the ansi escaped string
506 """
507 # If both color and style are not defined, then leave the text as is
508 if color is None and style is None:
509 return msg
510 escape_code = _get_ansi_code(color, style)
511 # If invalid (or unknown) color, don't wrap msg with ansi codes
512 if escape_code:
513 return '%s%s%s' % (escape_code, msg, ANSI_RESET)
514 return msg
515
516 DIFF_STYLE = {'separator': 'cyan', 'remove': 'red', 'add': 'green'}
517
518 def diff_colorize_ansi(lines, out=sys.stdout, style=DIFF_STYLE):
519 for line in lines:
520 if line[:4] in ('--- ', '+++ '):
521 out.write(colorize_ansi(line, style['separator']))
522 elif line[0] == '-':
523 out.write(colorize_ansi(line, style['remove']))
524 elif line[0] == '+':
525 out.write(colorize_ansi(line, style['add']))
526 elif line[:4] == '--- ':
527 out.write(colorize_ansi(line, style['separator']))
528 elif line[:4] == '+++ ':
529 out.write(colorize_ansi(line, style['separator']))
530 else:
531 out.write(line)
532
OLDNEW
« no previous file with comments | « third_party/logilab/common/testlib.py ('k') | third_party/logilab/common/tree.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698