#!/usr/bin/python # # Perforce Defect Tracking Integration Project # <http://www.ravenbrook.com/project/p4dti/> # # COVERAGE.PY -- COVERAGE TESTING # # Gareth Rees, Ravenbrook Limited, 2001-12-04 # # # 1. INTRODUCTION # # This module provides coverage testing for Python code. # # The intended readership is all Python developers. # # This document is not confidential. # # See [GDR 2001-12-04a] for the command-line interface, programmatic # interface and limitations. See [GDR 2001-12-04b] for requirements and # design. """Usage: coverage.py -x MODULE.py [ARG1 ARG2 ...] Execute module, passing the given command-line arguments, collecting coverage data. coverage.py -e Erase collected coverage data. coverage.py -r [-m] FILE1 FILE2 ... Report on the statement coverage for the given files. With the -m option, show line numbers of the statements that weren't executed. coverage.py -a [-d dir] FILE1 FILE2 ... Make annotated copies of the given files, marking statements that are executed with > and statements that are missed with !. With the -d option, make the copies in that directory. Without the -d option, make each copy in the same directory as the original. Coverage data is saved in the file .coverage by default. Set the COVERAGE_FILE environment variable to save it somewhere else.""" import os import re import string import sys import types # 2. IMPLEMENTATION # # This uses the "singleton" pattern. # # The word "morf" means a module object (from which the source file can # be deduced by suitable manipulation of the __file__ attribute) or a # filename. # # When we generate a coverage report we have to canonicalize every # filename in the coverage dictionary just in case it refers to the # module we are reporting on. It seems a shame to throw away this # information so the data in the coverage dictionary is transferred to # the 'cexecuted' dictionary under the canonical filenames. # # The coverage dictionary is called "c" and the trace function "t". The # reason for these short names is that Python looks up variables by name # at runtime and so execution time depends on the length of variables! # In the bottleneck of this application it's appropriate to abbreviate # names to increase speed. # A dictionary with an entry for (Python source file name, line number # in that file) if that line has been executed. c = {} # t(f, x, y). This method is passed to sys.settrace as a trace # function. See [van Rossum 2001-07-20b, 9.2] for an explanation of # sys.settrace and the arguments and return value of the trace function. # See [van Rossum 2001-07-20a, 3.2] for a description of frame and code # objects. def t(f, x, y): c[(f.f_code.co_filename, f.f_lineno)] = 1 return t the_coverage = None class coverage: error = "coverage error" # Name of the cache file (unless environment variable is set). cache_default = ".coverage" # Environment variable naming the cache file. cache_env = "COVERAGE_FILE" # A map from canonical Python source file name to a dictionary in # which there's an entry for each line number that has been # executed. cexecuted = {} # Cache of results of calling the analysis() method, so that you can # specify both -r and -a without doing double work. analysis_cache = {} # Cache of results of calling the canonical_filename() method, to # avoid duplicating work. canonical_filename_cache = {} def __init__(self): global the_coverage if the_coverage: raise self.error, "Only one coverage object allowed." self.cache = os.environ.get(self.cache_env, self.cache_default) self.restore() self.analysis_cache = {} def help(self, error=None): if error: print error print print __doc__ sys.exit(1) def command_line(self): import getopt settings = {} optmap = { '-a': 'annotate', '-d:': 'directory=', '-e': 'erase', '-h': 'help', '-i': 'ignore-errors', '-m': 'show-missing', '-r': 'report', '-x': 'execute', } short_opts = string.join(map(lambda o: o[1:], optmap.keys()), '') long_opts = optmap.values() options, args = getopt.getopt(sys.argv[1:], short_opts, long_opts) for o, a in options: if optmap.has_key(o): settings[optmap[o]] = 1 elif optmap.has_key(o + ':'): settings[optmap[o + ':']] = a elif o[2:] in long_opts: settings[o[2:]] = 1 elif o[2:] + '=' in long_opts: settings[o[2:]] = a else: self.help("Unknown option: '%s'." % o) if settings.get('help'): self.help() for i in ['erase', 'execute']: for j in ['annotate', 'report']: if settings.get(i) and settings.get(j): self.help("You can't specify the '%s' and '%s' " "options at the same time." % (i, j)) args_needed = (settings.get('execute') or settings.get('annotate') or settings.get('report')) action = settings.get('erase') or args_needed if not action: self.help("You must specify at least one of -e, -x, -r, " "or -a.") if not args_needed and args: self.help("Unexpected arguments %s." % args) if settings.get('erase'): self.erase() if settings.get('execute'): if not args: self.help("Nothing to do.") sys.argv = args self.start() import __main__ sys.path[0] = os.path.dirname(sys.argv[0]) execfile(sys.argv[0], __main__.__dict__) if not args: args = self.cexecuted.keys() ignore_errors = settings.get('ignore-errors') show_missing = settings.get('show-missing') directory = settings.get('directory=') if settings.get('report'): self.report(args, show_missing, ignore_errors) if settings.get('annotate'): self.annotate(args, directory, ignore_errors) def start(self): sys.settrace(t) def stop(self): sys.settrace(None) def erase(self): global c c = {} self.analysis_cache = {} self.cexecuted = {} if os.path.exists(self.cache): os.remove(self.cache) # save(). Save coverage data to the coverage cache. def save(self): self.canonicalize_filenames() cache = open(self.cache, 'wb') import marshal marshal.dump(self.cexecuted, cache) cache.close() # restore(). Restore coverage data from the coverage cache (if it # exists). def restore(self): global c c = {} self.cexecuted = {} if not os.path.exists(self.cache): return try: cache = open(self.cache, 'rb') import marshal cexecuted = marshal.load(cache) cache.close() if isinstance(cexecuted, types.DictType): self.cexecuted = cexecuted except: pass # canonical_filename(filename). Return a canonical filename for the # file (that is, an absolute path with no redundant components and # normalized case). See [GDR 2001-12-04b, 3.3]. def canonical_filename(self, filename): if not self.canonical_filename_cache.has_key(filename): f = filename if os.path.isabs(f) and not os.path.exists(f): f = os.path.basename(f) if not os.path.isabs(f): for path in [os.curdir] + sys.path: g = os.path.join(path, f) if os.path.exists(g): f = g break cf = os.path.normcase(os.path.abspath(f)) self.canonical_filename_cache[filename] = cf return self.canonical_filename_cache[filename] # canonicalize_filenames(). Copy results from "executed" to # "cexecuted", canonicalizing filenames on the way. Clear the # "executed" map. def canonicalize_filenames(self): global c for filename, lineno in c.keys(): f = self.canonical_filename(filename) if not self.cexecuted.has_key(f): self.cexecuted[f] = {} self.cexecuted[f][lineno] = 1 c = {} # morf_filename(morf). Return the filename for a module or file. def morf_filename(self, morf): if isinstance(morf, types.ModuleType): if not hasattr(morf, '__file__'): raise self.error, "Module has no __file__ attribute." file = morf.__file__ else: file = morf return self.canonical_filename(file) # analyze_morf(morf). Analyze the module or filename passed as # the argument. If the source code can't be found, raise an error. # Otherwise, return a pair of (1) the canonical filename of the # source code for the module, and (2) a list of lines of statements # in the source code. def analyze_morf(self, morf): if self.analysis_cache.has_key(morf): return self.analysis_cache[morf] filename = self.morf_filename(morf) ext = os.path.splitext(filename)[1] if ext == '.pyc': if not os.path.exists(filename[0:-1]): raise self.error, ("No source for compiled code '%s'." % filename) filename = filename[0:-1] elif ext != '.py': raise self.error, "File '%s' not Python source." % filename source = open(filename, 'r') import parser tree = parser.suite(source.read()).totuple(1) source.close() statements = {} self.find_statements(tree, statements) lines = statements.keys() lines.sort() result = filename, lines self.analysis_cache[morf] = result return result # find_statements(tree, dict). Find each statement in the parse # tree and record the line on which the statement starts in the # dictionary (by assigning it to 1). # # It works by walking the whole tree depth-first. Every time it # comes across a statement (symbol.stmt -- this includes compound # statements like 'if' and 'while') it calls find_statement, which # descends the tree below the statement to find the first terminal # token in that statement and record the lines on which that token # was found. # # This algorithm may find some lines several times (because of the # grammar production statement -> compound statement -> statement), # but that doesn't matter because we record lines as the keys of the # dictionary. # # See also [GDR 2001-12-04b, 3.2]. def find_statements(self, tree, dict): import symbol, token if token.ISNONTERMINAL(tree[0]): for t in tree[1:]: self.find_statements(t, dict) if tree[0] == symbol.stmt: self.find_statement(tree[1], dict) elif (tree[0] == token.NAME and tree[1] in ['elif', 'except', 'finally']): dict[tree[2]] = 1 def find_statement(self, tree, dict): import token while token.ISNONTERMINAL(tree[0]): tree = tree[1] dict[tree[2]] = 1 # format_lines(statements, lines). Format a list of line numbers # for printing by coalescing groups of lines as long as the lines # represent consecutive statements. This will coalesce even if # there are gaps between statements, so if statements = # [1,2,3,4,5,10,11,12,13,14] and lines = [1,2,5,10,11,13,14] then # format_lines will return "1-2, 5-11, 13-14". def format_lines(self, statements, lines): pairs = [] i = 0 j = 0 start = None pairs = [] while i < len(statements) and j < len(lines): if statements[i] == lines[j]: if start == None: start = lines[j] end = lines[j] j = j + 1 elif start: pairs.append((start, end)) start = None i = i + 1 if start: pairs.append((start, end)) def stringify(pair): start, end = pair if start == end: return "%d" % start else: return "%d-%d" % (start, end) import string return string.join(map(stringify, pairs), ", ") def analysis(self, morf): filename, statements = self.analyze_morf(morf) self.canonicalize_filenames() if not self.cexecuted.has_key(filename): self.cexecuted[filename] = {} missing = [] for line in statements: if not self.cexecuted[filename].has_key(line): missing.append(line) return (filename, statements, missing, self.format_lines(statements, missing)) def morf_name(self, morf): if isinstance(morf, types.ModuleType): return morf.__name__ else: return os.path.splitext(os.path.basename(morf))[0] def report(self, morfs, show_missing=1, ignore_errors=0): if not isinstance(morfs, types.ListType): morfs = [morfs] max_name = max([5,] + map(len, map(self.morf_name, morfs))) fmt_name = "%%- %ds " % max_name fmt_err = fmt_name + "%s: %s" header = fmt_name % "Name" + " Stmts Exec Cover" fmt_coverage = fmt_name + "% 6d % 6d % 5d%%" if show_missing: header = header + " Missing" fmt_coverage = fmt_coverage + " %s" print header print "-" * len(header) total_statements = 0 total_executed = 0 for morf in morfs: name = self.morf_name(morf) try: _, statements, missing, readable = self.analysis(morf) n = len(statements) m = n - len(missing) if n > 0: pc = 100.0 * m / n else: pc = 100.0 args = (name, n, m, pc) if show_missing: args = args + (readable,) print fmt_coverage % args total_statements = total_statements + n total_executed = total_executed + m except KeyboardInterrupt: raise except: if not ignore_errors: type, msg = sys.exc_info()[0:2] print fmt_err % (name, type, msg) if len(morfs) > 1: print "-" * len(header) if total_statements > 0: pc = 100.0 * total_executed / total_statements else: pc = 100.0 args = ("TOTAL", total_statements, total_executed, pc) if show_missing: args = args + ("",) print fmt_coverage % args # annotate(morfs, ignore_errors). blank_re = re.compile("\\s*(#|$)") else_re = re.compile("\\s*else\\s*:\\s*(#|$)") def annotate(self, morfs, directory=None, ignore_errors=0): for morf in morfs: try: filename, statements, missing, _ = self.analysis(morf) source = open(filename, 'r') if directory: dest_file = os.path.join(directory, os.path.basename(filename) + ',cover') else: dest_file = filename + ',cover' dest = open(dest_file, 'w') lineno = 0 i = 0 j = 0 covered = 1 while 1: line = source.readline() if line == '': break lineno = lineno + 1 while i < len(statements) and statements[i] < lineno: i = i + 1 while j < len(missing) and missing[j] < lineno: j = j + 1 if i < len(statements) and statements[i] == lineno: covered = j >= len(missing) or missing[j] > lineno if self.blank_re.match(line): dest.write(' ') elif self.else_re.match(line): # Special logic for lines containing only # 'else:'. See [GDR 2001-12-04b, 3.2]. if i >= len(statements) and j >= len(missing): dest.write('! ') elif i >= len(statements) or j >= len(missing): dest.write('> ') elif statements[i] == missing[j]: dest.write('! ') else: dest.write('> ') elif covered: dest.write('> ') else: dest.write('! ') dest.write(line) source.close() dest.close() except KeyboardInterrupt: raise except: if not ignore_errors: raise # Singleton object. the_coverage = coverage() # Module functions call methods in the singleton object. def start(*args): return apply(the_coverage.start, args) def stop(*args): return apply(the_coverage.stop, args) def erase(*args): return apply(the_coverage.erase, args) def analysis(*args): return apply(the_coverage.analysis, args) def report(*args): return apply(the_coverage.report, args) # Save coverage data when Python exits. (The atexit module wasn't # introduced until Python 2.0, so use sys.exitfunc when it's not # available.) try: import atexit atexit.register(the_coverage.save) except ImportError: sys.exitfunc = the_coverage.save # Command-line interface. if __name__ == '__main__': the_coverage.command_line() # A. REFERENCES # # [GDR 2001-12-04a] "Statement coverage for Python"; Gareth Rees; # Ravenbrook Limited; 2001-12-04; # <http://www.garethrees.org/2001/12/04/python-coverage/>. # # [GDR 2001-12-04b] "Statement coverage for Python: design and # analysis"; Gareth Rees; Ravenbrook Limited; 2001-12-04; # <http://www.garethrees.org/2001/12/04/python-coverage/design.html>. # # [van Rossum 2001-07-20a] "Python Reference Manual (releae 2.1.1)"; # Guide van Rossum; 2001-07-20; # <http://www.python.org/doc/2.1.1/ref/ref.html>. # # [van Rossum 2001-07-20b] "Python Library Reference"; Guido van Rossum; # 2001-07-20; <http://www.python.org/doc/2.1.1/lib/lib.html>. # # # B. DOCUMENT HISTORY # # 2001-12-04 GDR Created. # # 2001-12-06 GDR Added command-line interface and source code # annotation. # # 2001-12-09 GDR Moved design and interface to separate documents. # # 2001-12-10 GDR Open cache file as binary on Windows. Allow # simultaneous -e and -x, or -a and -r. # # 2001-12-12 GDR Added command-line help. Cache analysis so that it # only needs to be done once when you specify -a and -r. # # 2001-12-13 GDR Improved speed while recording. Portable between # Python 1.5.2 and 2.1.1. # # 2002-01-03 GDR Module-level functions work correctly. # # 2002-01-07 GDR Update sys.path when running a file with the -x option, # so that it matches the value the program would get if it were run on # its own. # # # C. COPYRIGHT AND LICENCE # # Copyright 2001 Gareth Rees. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # HOLDERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS # OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE # USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH # DAMAGE. # # # # $Id: //info.ravenbrook.com/user/gdr/www.garethrees.org/2001/12/04/python-coverage/coverage.py#8 $