third_party/pylint/checkers/similar.py - Issue 10447014: Add pylint to depot_tools.

Side by Side Diff: third_party/pylint/checkers/similar.py

Issue 10447014: Add pylint to depot_tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Fix unittests. Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # pylint: disable=W0622

	2 # Copyright (c) 2004-2006 LOGILAB S.A. (Paris, FRANCE).

	3 # http://www.logilab.fr/ -- mailto:contact@logilab.fr

	4 #

	5 # This program is free software; you can redistribute it and/or modify it under

	6 # the terms of the GNU General Public License as published by the Free Software

	7 # Foundation; either version 2 of the License, or (at your option) any later

	8 # version.

	9 #

	10 # This program is distributed in the hope that it will be useful, but WITHOUT

	11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

	12 # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details

	13 #

	14 # You should have received a copy of the GNU General Public License along with

	15 # this program; if not, write to the Free Software Foundation, Inc.,

	16 # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

	17 """a similarities / code duplication command line tool and pylint checker

	18 """

	19 from __future__ import generators

	20

	21 import sys

	22 from itertools import izip

	23

	24 from logilab.common.ureports import Table

	25

	26 from pylint.interfaces import IRawChecker

	27 from pylint.checkers import BaseChecker, table_lines_from_stats

	28

	29

	30 class Similar:

	31 """finds copy-pasted lines of code in a project"""

	32

	33 def __init__(self, min_lines=4, ignore_comments=False,

	34 ignore_docstrings=False):

	35 self.min_lines = min_lines

	36 self.ignore_comments = ignore_comments

	37 self.ignore_docstrings = ignore_docstrings

	38 self.linesets = []

	39

	40 def append_stream(self, streamid, stream):

	41 """append a file to search for similarities"""

	42 stream.seek(0) # XXX may be removed with astng > 0.23

	43 self.linesets.append(LineSet(streamid,

	44 stream.readlines(),

	45 self.ignore_comments,

	46 self.ignore_docstrings))

	47

	48 def run(self):

	49 """start looking for similarities and display results on stdout"""

	50 self._display_sims(self._compute_sims())

	51

	52 def _compute_sims(self):

	53 """compute similarities in appended files"""

	54 no_duplicates = {}

	55 for num, lineset1, idx1, lineset2, idx2 in self._iter_sims():

	56 duplicate = no_duplicates.setdefault(num, [])

	57 for couples in duplicate:

	58 if (lineset1, idx1) in couples or (lineset2, idx2) in couples:

	59 couples.add( (lineset1, idx1) )

	60 couples.add( (lineset2, idx2) )

	61 break

	62 else:

	63 duplicate.append( set([(lineset1, idx1), (lineset2, idx2)]) )

	64 sims = []

	65 for num, ensembles in no_duplicates.iteritems():

	66 for couples in ensembles:

	67 sims.append( (num, couples) )

	68 sims.sort()

	69 sims.reverse()

	70 return sims

	71

	72 def _display_sims(self, sims):

	73 """display computed similarities on stdout"""

	74 nb_lignes_dupliquees = 0

	75 for num, couples in sims:

	76 print

	77 print num, "similar lines in", len(couples), "files"

	78 couples = sorted(couples)

	79 for lineset, idx in couples:

	80 print "==%s:%s" % (lineset.name, idx)

	81 # pylint: disable=W0631

	82 for line in lineset._real_lines[idx:idx+num]:

	83 print " ", line,

	84 nb_lignes_dupliquees += num * (len(couples)-1)

	85 nb_total_lignes = sum([len(lineset) for lineset in self.linesets])

	86 print "TOTAL lines=%s duplicates=%s percent=%.2f" \

	87 % (nb_total_lignes, nb_lignes_dupliquees,

	88 nb_lignes_dupliquees*100. / nb_total_lignes)

	89

	90 def _find_common(self, lineset1, lineset2):

	91 """find similarities in the two given linesets"""

	92 lines1 = lineset1.enumerate_stripped

	93 lines2 = lineset2.enumerate_stripped

	94 find = lineset2.find

	95 index1 = 0

	96 min_lines = self.min_lines

	97 while index1 < len(lineset1):

	98 skip = 1

	99 num = 0

	100 for index2 in find( lineset1[index1] ):

	101 non_blank = 0

	102 for num, ((_, line1), (_, line2)) in enumerate(

	103 izip(lines1(index1), lines2(index2))):

	104 if line1 != line2:

	105 if non_blank > min_lines:

	106 yield num, lineset1, index1, lineset2, index2

	107 skip = max(skip, num)

	108 break

	109 if line1:

	110 non_blank += 1

	111 else:

	112 # we may have reach the end

	113 num += 1

	114 if non_blank > min_lines:

	115 yield num, lineset1, index1, lineset2, index2

	116 skip = max(skip, num)

	117 index1 += skip

	118

	119 def _iter_sims(self):

	120 """iterate on similarities among all files, by making a cartesian

	121 product

	122 """

	123 for idx, lineset in enumerate(self.linesets[:-1]):

	124 for lineset2 in self.linesets[idx+1:]:

	125 for sim in self._find_common(lineset, lineset2):

	126 yield sim

	127

	128 def stripped_lines(lines, ignore_comments, ignore_docstrings):

	129 strippedlines = []

	130 docstring = None

	131 for line in lines:

	132 line = line.strip()

	133 if ignore_docstrings:

	134 if not docstring and \

	135 (line.startswith('"""') or line.startswith("'''")):

	136 docstring = line[:3]

	137 line = line[3:]

	138 if docstring:

	139 if line.endswith(docstring):

	140 docstring = None

	141 line = ''

	142 if ignore_comments:

	143 # XXX should use regex in checkers/format to avoid cutting

	144 # at a "#" in a string

	145 line = line.split('#', 1)[0].strip()

	146 strippedlines.append(line)

	147 return strippedlines

	148

	149 class LineSet:

	150 """Holds and indexes all the lines of a single source file"""

	151 def __init__(self, name, lines, ignore_comments=False,

	152 ignore_docstrings=False):

	153 self.name = name

	154 self._real_lines = lines

	155 self._stripped_lines = stripped_lines(lines, ignore_comments,

	156 ignore_docstrings)

	157 self._index = self._mk_index()

	158

	159 def __str__(self):

	160 return '<Lineset for %s>' % self.name

	161

	162 def __len__(self):

	163 return len(self._real_lines)

	164

	165 def __getitem__(self, index):

	166 return self._stripped_lines[index]

	167

	168 def __lt__(self, other):

	169 return self.name < other.name

	170

	171 def __hash__(self):

	172 return id(self)

	173

	174 def enumerate_stripped(self, start_at=0):

	175 """return an iterator on stripped lines, starting from a given index

	176 if specified, else 0

	177 """

	178 idx = start_at

	179 if start_at:

	180 lines = self._stripped_lines[start_at:]

	181 else:

	182 lines = self._stripped_lines

	183 for line in lines:

	184 #if line:

	185 yield idx, line

	186 idx += 1

	187

	188 def find(self, stripped_line):

	189 """return positions of the given stripped line in this set"""

	190 return self._index.get(stripped_line, ())

	191

	192 def _mk_index(self):

	193 """create the index for this set"""

	194 index = {}

	195 for line_no, line in enumerate(self._stripped_lines):

	196 if line:

	197 index.setdefault(line, []).append( line_no )

	198 return index

	199

	200

	201 MSGS = {'R0801': ('Similar lines in %s files\n%s',

	202 'Indicates that a set of similar lines has been detected \

	203 among multiple file. This usually means that the code should \

	204 be refactored to avoid this duplication.')}

	205

	206 def report_similarities(sect, stats, old_stats):

	207 """make a layout with some stats about duplication"""

	208 lines = ['', 'now', 'previous', 'difference']

	209 lines += table_lines_from_stats(stats, old_stats,

	210 ('nb_duplicated_lines',

	211 'percent_duplicated_lines'))

	212 sect.append(Table(children=lines, cols=4, rheaders=1, cheaders=1))

	213

	214

	215 # wrapper to get a pylint checker from the similar class

	216 class SimilarChecker(BaseChecker, Similar):

	217 """checks for similarities and duplicated code. This computation may be

	218 memory / CPU intensive, so you should disable it if you experiment some

	219 problems.

	220 """

	221

	222 __implements__ = (IRawChecker,)

	223 # configuration section name

	224 name = 'similarities'

	225 # messages

	226 msgs = MSGS

	227 # configuration options

	228 # for available dict keys/values see the optik parser 'add_option' method

	229 options = (('min-similarity-lines',

	230 {'default' : 4, 'type' : "int", 'metavar' : '<int>',

	231 'help' : 'Minimum lines number of a similarity.'}),

	232 ('ignore-comments',

	233 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

	234 'help': 'Ignore comments when computing similarities.'}

	235 ),

	236 ('ignore-docstrings',

	237 {'default' : True, 'type' : 'yn', 'metavar' : '<y or n>',

	238 'help': 'Ignore docstrings when computing similarities.'}

	239 ),

	240 )

	241 # reports

	242 reports = ( ('R0801', 'Duplication', report_similarities), ) # XXX actually a Refactoring message

	243

	244 def __init__(self, linter=None):

	245 BaseChecker.__init__(self, linter)

	246 Similar.__init__(self, min_lines=4,

	247 ignore_comments=True, ignore_docstrings=True)

	248 self.stats = None

	249

	250 def set_option(self, optname, value, action=None, optdict=None):

	251 """method called to set an option (registered in the options list)

	252

	253 overridden to report options setting to Similar

	254 """

	255 BaseChecker.set_option(self, optname, value, action, optdict)

	256 if optname == 'min-similarity-lines':

	257 self.min_lines = self.config.min_similarity_lines

	258 elif optname == 'ignore-comments':

	259 self.ignore_comments = self.config.ignore_comments

	260 elif optname == 'ignore-docstrings':

	261 self.ignore_docstrings = self.config.ignore_docstrings

	262

	263 def open(self):

	264 """init the checkers: reset linesets and statistics information"""

	265 self.linesets = []

	266 self.stats = self.linter.add_stats(nb_duplicated_lines=0,

	267 percent_duplicated_lines=0)

	268

	269 def process_module(self, node):

	270 """process a module

	271

	272 the module's content is accessible via the stream object

	273

	274 stream must implement the readlines method

	275 """

	276 self.append_stream(self.linter.current_name, node.file_stream)

	277

	278 def close(self):

	279 """compute and display similarities on closing (i.e. end of parsing)"""

	280 total = sum([len(lineset) for lineset in self.linesets])

	281 duplicated = 0

	282 stats = self.stats

	283 for num, couples in self._compute_sims():

	284 msg = []

	285 for lineset, idx in couples:

	286 msg.append("==%s:%s" % (lineset.name, idx))

	287 msg.sort()

	288 # pylint: disable=W0631

	289 for line in lineset._real_lines[idx:idx+num]:

	290 msg.append(line.rstrip())

	291 self.add_message('R0801', args=(len(couples), '\n'.join(msg)))

	292 duplicated += num * (len(couples) - 1)

	293 stats['nb_duplicated_lines'] = duplicated

	294 stats['percent_duplicated_lines'] = total and duplicated * 100. / total

	295

	296

	297 def register(linter):

	298 """required method to auto register this checker """

	299 linter.register_checker(SimilarChecker(linter))

	300

	301 def usage(status=0):

	302 """display command line usage information"""

	303 print "finds copy pasted blocks in a set of files"

	304 print

	305 print 'Usage: symilar [-d\|--duplicates min_duplicated_lines] \

	306 [-i\|--ignore-comments] file1...'

	307 sys.exit(status)

	308

	309 def run(argv=None):

	310 """standalone command line access point"""

	311 if argv is None:

	312 argv = sys.argv[1:]

	313 from getopt import getopt

	314 s_opts = 'hdi'

	315 l_opts = ('help', 'duplicates=', 'ignore-comments')

	316 min_lines = 4

	317 ignore_comments = False

	318 opts, args = getopt(argv, s_opts, l_opts)

	319 for opt, val in opts:

	320 if opt in ('-d', '--duplicates'):

	321 min_lines = int(val)

	322 elif opt in ('-h', '--help'):

	323 usage()

	324 elif opt in ('-i', '--ignore-comments'):

	325 ignore_comments = True

	326 if not args:

	327 usage(1)

	328 sim = Similar(min_lines, ignore_comments)

	329 for filename in args:

	330 sim.append_stream(filename, open(filename))

	331 sim.run()

	332

	333 if __name__ == '__main__':

	334 run()

OLD	NEW

« no previous file with comments | « third_party/pylint/checkers/raw_metrics.py ('k') | third_party/pylint/checkers/string_format.py » ('j') | no next file with comments »