Source code for grec.grec

# -*- coding: utf-8 -*-

"""Colorize terminal text with regular expressions.

This module implements all functionality required for grec.

"""

import sys
import argparse
import re
from itertools import izip_longest
from collections import MutableMapping, OrderedDict
from termcolor import colored, COLORS, HIGHLIGHTS


[docs]class Intervals(MutableMapping): """Dictionary with intervals as keys and arbitrary data as values. Used to check whether intervals overlap. An interval is a tuple of two integers, *start* and *end*. Like a slice, *start* marks the first value of the interval while *end* is one past the last value of the interval. """ def __init__(self, intervals=None): self.data = {} if intervals is not None: for interval, value in intervals.iteritems(): self[interval] = value def __setitem__(self, key, value): assert key[0] < key[1], \ "End of interval must be strictly greater than its start" self.data[key] = value def __getitem__(self, key): return self.data[key] def __delitem__(self, key): del self.data[key] def __iter__(self): return iter(sorted(self.data)) def __len__(self): return len(self.data) @classmethod def _interval_overlap(cls, interval1, interval2): """Return true if the two intervals overlap.""" start1, end1 = interval1 start2, end2 = interval2 return start1 < end2 and end1 > start2
[docs] def overlap(self, interval): """Return all intervals in dict overlapping the given interval. :param tuple interval: start and end of interval """ return set(i for i in self.data if self._interval_overlap(i, interval))
[docs]class ColoredString(object): """String with colorized parts. :ivar string: The plain string without any color information. :ivar intervals: :class:`Intervals` instance associating intervals with colors. """ def __init__(self, string): self.string = string self.intervals = Intervals()
[docs] def apply_color(self, start, end, color_info): """Apply color to all characters within an interval. :param int start: index of first character in string to colorize :param int end: index of one past the last character to colorize :param tuple color_info: foreground and background color as strings The characters of the string that have indices *start* through *end* - 1 will be assigned the colors specified in *color_info*. If any characters in the interval already have a color set, their color will be replaced with the new color. The strings in *color_info* need to be recognized by :mod:`termcolor`. >>> s = ColoredString('a word with color') >>> s.apply_color(2, 6, ('red', 'on_white')) >>> print s a \x1b[47m\x1b[31mword\x1b[0m with color """ # Find any overlapping colorized intervals. If found we need # to truncate them to make room for the new interval. for interval in self.intervals.overlap((start, end)): other_start, other_end = interval # Save the parts that aren't obscured by the new interval. # Those can only be on the left and right side of the new # interval. if other_start < start: self.intervals[(other_start, start)] = self.intervals[interval] if end < other_end: self.intervals[(end, other_end)] = self.intervals[interval] # Delete original interval del self.intervals[interval] # When there's no more overlapping intervals, set our new one # with its associated color self.intervals[(start, end)] = color_info
[docs] def __str__(self): """Return string with ANSI escape codes for colors.""" offset = 0 segments = [] for (start, end), color_info in self.intervals.iteritems(): segments.append(self.string[offset:start]) segments.append(colored(self.string[start:end], *color_info)) offset = end segments.append(self.string[offset:]) return ''.join(segments)
[docs]class Matcher(object): """Colorize text based on regular expression matches. :ivar patterns: :class:`OrderedDict` of all configured patterns >>> m = Matcher() >>> m.add_pattern('A', 'red') >>> m.add_pattern('B.', 'blue') >>> colored = m.match('ABC') >>> colored.string 'ABC' >>> print colored \x1b[31mA\x1b[0m\x1b[34mBC\x1b[0m """ def __init__(self): """Create new instance, containing no patterns.""" self.patterns = OrderedDict() @staticmethod def _termcolor_names(foreground, background=None): """Translate color names to ones recognized by termcolor. termcolor.colored expects backgrounds to be specified as 'on_white', 'on_red', and so on. Raises ValueError if foreground or background is not recognized by termcolor. """ # Save the name in case we need to raise an error original_bg = background if background is not None: background = 'on_' + background if foreground and foreground not in COLORS: raise ValueError("Color '{}' not recognized".format(foreground)) if background and background not in HIGHLIGHTS: raise ValueError("Color '{}' not recognized".format(original_bg)) return (foreground, background) def _add_to_patterns(self, regex, pattern): """Add new pattern to patterns instance variable. If the associated regular expression is already present in self.patterns it will first be removed to ensure replacement. """ if regex in self.patterns: del self.patterns[regex] self.patterns[regex] = pattern
[docs] def add_pattern(self, regex, foreground=None, background=None): """Add regular expression for text colorization. :param string regex: regular expression :param string foreground: foreground color :param string background: background color The order of additions is significant. Matching and colorization will be applied in the same order as they are added with this method. If the passed regular expression *regex* is identical to an already added one (color information not considered), then that old pattern will be replaced with this one. The ordering will still be updated, so any other already present patterns will be processed before this one when matching. >>> m = Matcher() >>> m.add_pattern('^$', 'red') >>> m.add_pattern('[A-Z]+', 'blue', 'white') """ self._add_to_patterns(regex, { 'group': False, 'regex': re.compile(regex), 'color_info': self._termcolor_names(foreground, background) })
[docs] def add_group_pattern(self, regex, *args): """Add regular expression with groups for text colorization. :param string regex: regular expression :param tuple args: color information for each matched group Works like :func:`add_pattern` but colors matched groups instead of the whole match. Takes a variable number of arguments where each is a tuple with color information: *(foreground, background)*. These will be used to colorize the corresponding group matches in the same order. When a regular expression contains more groups than colors, the color information specified in the last argument is repeated for all remaining groups. When a regular expression contains less groups than colors then excess colors are ignored. >>> m = Matcher() >>> m.add_group_pattern('^#.*(ERROR)', ('red',)) >>> m.add_group_pattern('A(B)C(D)', ('blue', 'white'), ('red',)) """ self._add_to_patterns(regex, { 'group': True, 'regex': re.compile(regex), 'color_info': [self._termcolor_names(*colors) for colors in args] })
[docs] def remove_pattern(self, regex): """Remove the pattern with the given regular expression. :param string regex: regular expression of a previously added pattern >>> m = Matcher() >>> m.add_pattern('[A-Z]', 'blue') >>> len(m.patterns) 1 >>> m.remove_pattern('[A-Z]') >>> len(m.patterns) 0 """ del self.patterns[regex]
[docs] def match(self, text): """Colorize text according to pattern matches. :param string text: string to match for colorization :rtype: :class:`ColoredString` instance Returns a :class:`ColoredString` which may or may not have an actual color, depending on whether any patterns matched the passed string. Printing the instance in the terminal will show the string with its assigned colors. >>> m = Matcher() >>> m.add_pattern('5', 'red') >>> colored_string = m.match('1 2 3 4 5') >>> colored_string # doctest: +ELLIPSIS <grec....ColoredString object at 0x...> >>> print colored_string 1 2 3 4 \x1b[31m5\x1b[0m """ colored_string = ColoredString(text) for pattern in self.patterns.itervalues(): for re_match in pattern['regex'].finditer(text): if pattern['group']: # If this is a group pattern, we need to iterate # over and colorize all groups. intervals = re_match.regs[1:] colors = pattern['color_info'] else: # Otherwise, we only have one interval to # colorize: the span of the whole match. intervals = (re_match.span(),) colors = (pattern['color_info'],) # If there are more colors than intervals, truncate # the list of colors to have the same length as # intervals. colors = colors[:len(intervals)] # Skip the colorization if we ended up with no colors. if not colors: continue # Pair up intervals with their colors. If there are # more intervals than colors then fill up the missing # colors with the last color in the colors array. pairs = izip_longest(intervals, colors, fillvalue=colors[-1]) for (start, end), color_info in pairs: colored_string.apply_color(start, end, color_info) return colored_string
[docs] def match_iter(self, iterable): """Return an iterator of match results. :param iterable: iterable of strings to match :rtype: iterator For each string in *iterable*, the returned iterator yields a :class:`ColoredString` instance which is the result of performing a pattern match on the string. >>> m = Matcher() >>> m.add_pattern('2', 'green') >>> for colored_string in m.match_iter(['1', '2', '3']): ... print colored_string 1 \x1b[32m2\x1b[0m 3 """ for line in iterable: yield self.match(line)
[docs]class PatternAction(argparse.Action): """Action class to handle pattern arguments with argparse. To retain ordering between *-m* and *-g* arguments from command-line input, this class aggregates them in order into a list. """ def __call__(self, parser, namespace, values, option_string=None): if not 'pattern_data' in namespace: # For the first pattern, create the (empty) attribute setattr(namespace, 'pattern_data', []) # Append next pattern to 'pattern_data' previous = namespace.pattern_data previous.append((self.dest, values)) setattr(namespace, 'pattern_data', previous)
[docs]def parse_arguments(args): """Parse command line arguments.""" parser = argparse.ArgumentParser( description='Colorize text by regular expressions', usage='grec [-h] [-m PATTERN COLOR_INFO] ' '[-g PATTERN [COLOR_INFO ...]] -- file') parser.add_argument('-m', dest='normal', nargs=2, action=PatternAction, metavar=('PATTERN', 'COLOR_INFO'), help='colorize each occurence of PATTERN ' 'with the colors specified in COLOR_INFO. ' 'This argument can be used multiple times.') parser.add_argument('-g', dest='group', nargs='+', action=PatternAction, metavar=('PATTERN', 'COLOR_INFO'), help='colorize all groups in each ' 'occurence of PATTERN with the colors ' 'specified in COLOR_INFO. The number of ' ' colors should match the number of groups ' 'in the regular expression. ' 'This argument can be used multiple times.') parser.add_argument('file', type=argparse.FileType('r'), nargs='?', default=sys.stdin, help="file whose contents to colorize ('-' for stdin)") return parser.parse_args(args)
[docs]def split_colors(color): """Convert color from command line into foreground and background. :param string color: color(s) to translate Backgrounds can be optionally prepended by 'on'. >>> split_colors("yellow") ['yellow'] >>> split_colors("red_on_blue") ['red', 'blue'] >>> split_colors("green white") ['green', 'white'] """ return [item for item in re.split(r'\W+|_', color) if item != 'on']
[docs]def main(args=None): """Run grec command.""" args = parse_arguments(args) matcher = Matcher() for pattern_type, pattern in args.pattern_data: # Regex and color(s) from command line regex, colors = pattern[0], pattern[1:] # Patterns can be of normal or of group type if pattern_type == 'normal': # Here we only have one color for the whole match. assert len(colors) == 1 color_info = split_colors(colors[0]) matcher.add_pattern(regex, *color_info) elif pattern_type == 'group': # For group patterns we use multiple colors color_info = [split_colors(color) for color in colors] matcher.add_group_pattern(regex, *color_info) for colored_string in matcher.match_iter(args.file): sys.stdout.write(str(colored_string)) return 0