Source code for pibootctl.formatter

# Copyright (c) 2020 Canonical Ltd.
# Copyright (c) 2019, 2020 Dave Jones <dave@waveform.org.uk>
#
# This file is part of pibootctl.
#
# pibootctl is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# pibootctl is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with pibootctl.  If not, see <https://www.gnu.org/licenses/>.

"""
The :mod:`pibootctl.formatter` module contains some generic text formatting
routines, including the :class:`TableWrapper` class (akin to
:class:`~textwrap.TextWrapper` but specific to table output), :class:`TransMap`
for partially formatting templates, and the :func:`render` function: a crude
markup renderer.

.. autoclass:: TableWrapper

.. data:: pretty_table

    Uses simple ASCII characters to produce a typical "box-like" table
    appearance::

        >>> from pibootctl.formatter import *
        >>> wrapper = TableWrapper(width=80, **pretty_table)
        >>> data = [
        ... ('Name', 'Length', 'Position'),
        ... ('foo', 3, 1),
        ... ('bar', 3, 2),
        ... ('baz', 3, 3),
        ... ('quux', 4, 4)]
        >>> print(wrapper.fill(data))
        +------+--------+----------+
        | Name | Length | Position |
        |------+--------+----------|
        | foo  | 3      | 1        |
        | bar  | 3      | 2        |
        | baz  | 3      | 3        |
        | quux | 4      | 4        |
        +------+--------+----------+

.. data:: curvy_table

    Uses simple ASCII characters to produce a "round-edged" table appearance::

        >>> from pibootctl.formatter import *
        >>> wrapper = TableWrapper(width=80, **curvy_table)
        >>> data = [
        ... ('Name', 'Length', 'Position'),
        ... ('foo', 3, 1),
        ... ('bar', 3, 2),
        ... ('baz', 3, 3),
        ... ('quux', 4, 4)]
        >>> print(wrapper.fill(data))
        ,------+--------+----------.
        | Name | Length | Position |
        |------+--------+----------|
        | foo  | 3      | 1        |
        | bar  | 3      | 2        |
        | baz  | 3      | 3        |
        | quux | 4      | 4        |
        `------+--------+----------'

.. data:: unicode_table

    Uses unicode box-drawing characters to produce a typical "box-like" table
    appearance::

        >>> from pibootctl.formatter import *
        >>> wrapper = TableWrapper(width=80, **unicode_table)
        >>> data = [
        ... ('Name', 'Length', 'Position'),
        ... ('foo', 3, 1),
        ... ('bar', 3, 2),
        ... ('baz', 3, 3),
        ... ('quux', 4, 4)]
        >>> print(wrapper.fill(data))
        ┌──────┬────────┬──────────┐
        │ Name │ Length │ Position │
        ├──────┼────────┼──────────┤
        │ foo  │ 3      │ 1        │
        │ bar  │ 3      │ 2        │
        │ baz  │ 3      │ 3        │
        │ quux │ 4      │ 4        │
        └──────┴────────┴──────────┘

.. data:: curvy_unicode_table

    Uses unicode box-drawing characters to produce a "round-edged" table
    appearance::

        >>> from pibootctl.formatter import *
        >>> wrapper = TableWrapper(width=80, **curvy_unicode_table)
        >>> data = [
        ... ('Name', 'Length', 'Position'),
        ... ('foo', 3, 1),
        ... ('bar', 3, 2),
        ... ('baz', 3, 3),
        ... ('quux', 4, 4)]
        >>> print(wrapper.fill(data))
        ╭──────┬────────┬──────────╮
        │ Name │ Length │ Position │
        ├──────┼────────┼──────────┤
        │ foo  │ 3      │ 1        │
        │ bar  │ 3      │ 2        │
        │ baz  │ 3      │ 3        │
        │ quux │ 4      │ 4        │
        ╰──────┴────────┴──────────╯

.. autoclass:: TransMap

.. autoclass:: FormatDict

.. autofunction:: int_ranges

.. autofunction:: render
"""

import re
from bisect import bisect
from textwrap import dedent, TextWrapper
from itertools import islice, zip_longest, chain, tee


[docs]class TableWrapper: """ Similar to :class:`~textwrap.TextWrapper`, this class provides facilities for wrapping text to a particular width, but with a focus on table-based output. The constructor takes numerous arguments, but typically you don't need to specify them all (or at all). A series of dictionaries are provided with "common" configurations: :data:`pretty_table`, :data:`curvy_table`, :data:`unicode_table`, and :data:`curvy_unicode_table`. For example:: >>> from pibootctl.formatter import * >>> wrapper = TableWrapper(width=80, **curvy_table) >>> data = [ ... ('Name', 'Length', 'Position'), ... ('foo', 3, 1), ... ('bar', 3, 2), ... ('baz', 3, 3), ... ('quux', 4, 4)] >>> print(wrapper.fill(data)) ,------+--------+----------. | Name | Length | Position | |------+--------+----------| | foo | 3 | 1 | | bar | 3 | 2 | | baz | 3 | 3 | | quux | 4 | 4 | `------+--------+----------' The :class:`TableWrapper` instance attributes (and keyword arguments to the constructor) are as follows: .. attribute:: width (default 70) The maximum number of characters that the table can take up horizontally. :class:`TableWrapper` guarantees that no output line will be longer than :attr:`width` characters. .. attribute:: header_rows (default 1) The number of rows at the top of the table that will be separated from the following rows by a horizontal border (:attr:`internal_line`). .. attribute:: footer_rows (default 0) The number of rows at the bottom of the table that will be separated from the preceding rows by a horizontal border (:attr:`internal_line`). .. attribute:: cell_separator (default ``' '``) The string used to separate columns of cells. .. attribute:: internal_line (default ``'-'``) The string used to draw horizontal lines inside the table for :attr:`header_rows` and :attr:`footer_rows`. .. attribute:: internal_separator (default ``' '``) The string used within runs of :attr:`internal_line` to separate columns. .. attribute:: borders (default ``('', '', '', '')``) A 4-tuple of strings which specify the characters used to create the left, top, right, and bottom borders of the table respectively. .. attribute:: corners (default ``('', '', '', '')``) A 4-tuple of strings which specify the characters used for the top-left, top-right, bottom-right, and bottom-left corners of the table respectively. .. attribute:: internal_borders (default ``('', '', '', '')``) A 4-tuple of strings which specify the characters used to interrupt runs of the :attr:`borders` characters to draw row and column separators. Like :attr:`borders` these are the left, top, right, and bottom characters respectively. .. attribute:: align A callable accepting three parameters: 0-based row index, 0-based column index, and the cell data. The callable must return a character indicating the intended alignment of data within the cell. "<" for left justification, "^" for centered alignment, and ">" for right justification (as in :meth:`str.format`). The default is to left align everything. .. attribute:: format A callable accepting three parameters: 0-based row index, 0-based column index, and the cell data. The callable must return the desired string representation of the cell data. The default simply calls :class:`str` on everything. :class:`TableWrapper` also provides similar public methods to :class:`~textwrap.TextWrapper`: .. automethod:: wrap .. automethod:: fill """ def __init__(self, width=70, header_rows=1, footer_rows=0, cell_separator=' ', internal_line='-', internal_separator=' ', borders=('', '', '', ''), corners=('', '', '', ''), internal_borders=('', '', '', ''), align=None, format=None): if len(borders) != 4: raise ValueError('borders must be a 4-tuple of strings') if len(corners) != 4: raise ValueError('corners must be a 4-tuple of strings') if len(internal_borders) != 4: raise ValueError('internal_borders must be a 4-tuple of strings') self.width = width self.header_rows = header_rows self.footer_rows = footer_rows self.internal_line = internal_line self.cell_separator = cell_separator self.internal_separator = internal_separator self.internal_borders = internal_borders self.borders = tuple(borders) self.corners = tuple(corners) self.internal_borders = tuple(internal_borders) if align is None: align = lambda row, col, data: '<' self.align = align if format is None: format = lambda row, col, data: str(data) self.format = format def fit_widths(self, widths): """ Internal method which, given the sequence of *widths* (the calculated maximum width of each column), reduces those widths until they fit in the specified :attr:`width` limit, taking into account the implied width of column separators, borders, etc. """ min_width = sum(( len(self.borders[0]), len(self.borders[2]), len(self.cell_separator) * (len(widths) - 1) )) # Minimum width of each column is 1 if min_width + len(widths) > self.width: raise ValueError('width is too thin to accommodate the table') total_width = sum(widths) + min_width # Reduce column widths until they fit in the available space. First, we # sort by the current column widths then by index so the widest columns # form a left-to-right ordered suffix of the list widths = sorted((w, i) for i, w in enumerate(widths)) while total_width > self.width: # Find the insertion point before the suffix suffix = bisect(widths, (widths[-1][0] - 1, -1)) suffix_len = len(widths) - suffix # Calculate the amount of width we still need to shed reduce_by = total_width - self.width if suffix > 0: # Limit this by the amount that can be removed evenly from the # suffix columns before the suffix needs to expand to encompass # more columns (requiring another loop) reduce_by = min( reduce_by, (widths[suffix][0] - widths[suffix - 1][0]) * suffix_len ) # Distribute the reduction evenly across the columns of the suffix widths[suffix:] = [ (w - reduce_by // suffix_len, i) for w, i in widths[suffix:] ] # Subtract the remainder from the left-most columns of the suffix for i in range(suffix, suffix + reduce_by % suffix_len): widths[i] = (widths[i][0] - 1, widths[i][1]) total_width -= reduce_by return [w for i, w in sorted((i, w) for w, i in widths)] def wrap_lines(self, data, widths): """ Internal method responsible for wrapping the contents of each cell in each row in *data* to the specified column *widths*. """ # Construct wrappers for each column width wrappers = [TextWrapper(width=width) for width in widths] for y, row in enumerate(data): aligns = [self.align(y, x, cell) for x, cell in enumerate(row)] # Construct a list of wrapped lines for each cell in the row; these # are not necessarily of equal length (hence zip_longest below) cols = [ wrapper.wrap(self.format(y, x, cell)) for x, (cell, wrapper) in enumerate(zip(row, wrappers)) ] for line in zip_longest(*cols, fillvalue=''): yield ( self.borders[0] + self.cell_separator.join( '{cell:{align}{width}}'.format( cell=cell, align=align, width=width) for align, width, cell in zip(aligns, widths, line)) + self.borders[2] ) def generate_lines(self, data): """ Internal method which, given a sequence of rows of tuples in *data*, uses :meth:`fit_widths` to calculate the maximum possible column widths, and :meth:`wrap_lines` to wrap the text in *data* to the calculated widths, yielding rows of strings to the caller. """ widths = [ max(1, max(len( self.format(y, x, item)) for x, item in enumerate(row))) for y, row in enumerate(zip(*data)) # transpose ] widths = self.fit_widths(widths) lines = iter(data) if self.borders[1]: yield ( self.corners[0] + self.internal_borders[1].join( self.borders[1] * width for width in widths) + self.corners[1] ) if self.header_rows > 0: yield from self.wrap_lines(islice(lines, self.header_rows), widths) yield ( self.internal_borders[0] + self.internal_separator.join( self.internal_line * w for w in widths) + self.internal_borders[2] ) yield from self.wrap_lines( islice(lines, len(data) - self.header_rows - self.footer_rows), widths) if self.footer_rows > 0: yield ( self.internal_borders[0] + self.internal_separator.join( self.internal_line * w for w in widths) + self.internal_borders[2] ) yield from self.wrap_lines(lines, widths) if self.borders[3]: yield ( self.corners[3] + self.internal_borders[3].join( self.borders[3] * width for width in widths) + self.corners[2] )
[docs] def wrap(self, data): """ Wraps the table *data* returning a list of output lines without final newlines. *data* must be a sequence of row tuples, each of which is assumed to be the same length. If the current :attr:`width` does not permit at least a single character per column (after taking account of the width of borders, internal separators, etc.) then :exc:`ValueError` will be raised. """ return list(self.generate_lines(data))
[docs] def fill(self, data): """ Wraps the table *data* returning a string containing the wrapped output. """ return '\n'.join(self.wrap(data))
# Some prettier defaults for TableWrapper pretty_table = { 'cell_separator': ' | ', 'internal_line': '-', 'internal_separator': '-+-', 'borders': ('| ', '-', ' |', '-'), 'corners': ('+-', '-+', '-+', '+-'), 'internal_borders': ('|-', '-+-', '-|', '-+-'), } curvy_table = pretty_table.copy() curvy_table['corners'] = (',-', '-.', "-'", '`-') unicode_table = { 'cell_separator': ' │ ', 'internal_line': '─', 'internal_separator': '─┼─', 'borders': ('│ ', '─', ' │', '─'), 'corners': ('┌─', '─┐', '─┘', '└─'), 'internal_borders': ('├─', '─┬─', '─┤', '─┴─'), } curvy_unicode_table = unicode_table.copy() curvy_unicode_table['corners'] = ('╭─', '─╮', '─╯', '╰─') def pairwise(iterable): """ Taken from the recipe in the documentation for :mod:`itertools`. """ a, b = tee(iterable) next(b, None) return zip(a, b)
[docs]def int_ranges(values, range_sep='-', list_sep=', '): """ Given a set of integer *values*, returns a compressed string representation of all values in the set. For example: >>> int_ranges({1, 2}) '1, 2' >>> int_ranges({1, 2, 3}) '1-3' >>> int_ranges({1, 2, 3, 4, 8}) '1-4, 8' >>> int_ranges({1, 2, 3, 4, 8, 9}) '1-4, 8-9' *range_sep* and *list_sep* can be optionally specified to customize the strings used to separate ranges and lists of ranges respectively. """ if len(values) == 0: return '' elif len(values) == 1: return '{0}'.format(*values) elif len(values) == 2: return '{0}{sep}{1}'.format(*values, sep=list_sep) else: ranges = [] start = None for i, j in pairwise(sorted(values)): if start is None: start = i if j > i + 1: ranges.append((start, i)) start = j if j == i + 1: ranges.append((start, j)) else: ranges.append((j, j)) return list_sep.join( ('{start}{sep}{finish}' if finish > start else '{start}').format( start=start, finish=finish, sep=range_sep) for start, finish in ranges )
class TransTemplate(str): """ Used by :class:`TransMap` to transparently pass unknown format templates through for later substitution. When this value is used in a :meth:`str.format` substitution, it renders itself with the format specification as {self!conv:spec}, passing the template through verbatim. """ # NOTE: No calling str.format in this class! ;) def __repr__(self): return TransTemplate(self + '!r') def __str__(self): return TransTemplate(self + '!s') def __format__(self, spec): if spec: parts = ('{', self, ':', spec, '}') else: parts = ('{', self, '}') return ''.join(parts)
[docs]class TransMap: """ Used with :meth:`str.format_map` to substitute only a subset of values in a given template, passing the rest through for later processing. For example: >>> '{foo}{bar}'.format_map(TransMap(foo=1)) '1{bar}' >>> '{foo:02d}{bar:02d}{baz:02d}'.format_map(TransMap(foo=1, baz=3)) '01{bar:02d}03' .. note:: One exception is that the ``!a`` conversion is not handled correctly. This is erroneously converted to ``!r``. Unfortunately there's no solution to this; it's a side-effect of the means by which the ``!a`` conversion is performed. """ def __init__(self, **kw): self._kw = kw def __contains__(self, key): return True def __getitem__(self, key): return self._kw.get(key, TransTemplate(key))
[docs]class FormatDict: """ Used to format *data*, a :class:`dict`, in a format acceptable as input to the :func:`render` function. The *key_title* and *value_title* strings provide the cells for the single header row. This class is intended to be used within a string for :meth:`str.format`. For example:: >>> from pibootctl.formatter import FormatDict >>> d = {'foo': 100, 'bar': 200} >>> print('An example table:\\n\\n{s}'.format(s=FormatDict(d))) An example table: | Key | Value | | foo | 100 | | bar | 200 | The format specification in the format string can be used to request different kinds of output, for instance:: >>> f = FormatDict({'foo': 100, 'bar': 200}) >>> print('An example list:\\n\\n{f:list}'.format(f=f)) An example list: * foo = 100 * bar = 200 >>> print('An example reference list:\\n\\n{f:refs}'.format(f=f)) An example reference list: [foo]: 100 [bar]: 200 The default format specification is "table", naturally. If the values are tuples that should be expanded into multiple columns, set *value_title* to a tuple with the corresponding column titles:: >>> from pibootctl.formatter import FormatDict >>> d = {'foo': (1, 100), 'bar': (2, 200)} >>> print('An example table:\\n\\n{s}'.format(s=FormatDict(d, ... value_title=('col1', 'col2')))) An example table: | Key | col1 | col2 | | foo | 1 | 100 | | bar | 2 | 200 | Tuple values are only supported for table output. .. note:: In Python versions before 3.7, you may need to use :class:`collections.OrderedDict` to ensure output of the elements of *data* in a particular order. Alternatively, you may specify a *sort_key* value which will be applied to the key values of the dict to sort them prior to output. """ def __init__(self, data, key_title='Key', value_title='Value', sort_key=None): self.data = data self.key_title = key_title self.value_title = value_title self.sort_key = sort_key def __format__(self, spec): if self.sort_key is None: items = self.data.items() else: items = ( (key, self.data[key]) for key in sorted(self.data.keys(), key=self.sort_key) ) if not spec or spec == 'table': if isinstance(self.value_title, tuple): return '\n'.join( '| {key} | {values} |'.format( key=key, values=' | '.join(values)) for key, values in chain( [(self.key_title, self.value_title)], items ) ) else: return '\n'.join( '| {key} | {value} |'.format(key=key, value=value) for key, value in chain( [(self.key_title, self.value_title)], items ) ) elif spec == 'list': return '\n'.join( '* {key} = {value}'.format(key=key, value=value) for key, value in items ) elif spec == 'refs': return '\n'.join( '[{key}]: {value}'.format(key=key, value=value) for key, value in items ) else: raise ValueError('Unknown format spec. {!r}'.format(spec))
def lex(text): """ Internal function which acts as the lexer for :func:`render`. """ row_re = re.compile(r'^\|.*\|$') item_re = re.compile(r'^\*') ref_re = re.compile(r'^\[[0-9A-Z]+\]:') for line in text.splitlines() + ['']: line = line.rstrip() if row_re.match(line): yield 'row', [col.strip() for col in line[1:-1].split('|')] elif item_re.match(line): yield 'item', line[1:].strip() elif ref_re.match(line): ref, link = line.split(':', 1) yield 'ref', (ref, link.strip()) elif line: yield 'line', line.strip() else: yield 'blank', None # Always yield a final "blank" just to make the outer parser easier yield 'blank', None def parse(text): """ Internal function which acts as the parser for :func:`render`. """ state = 'break' rows = [] items = [] item = [] para = [] def start_table(): nonlocal rows rows = [s] return 'table/row' def start_list(): nonlocal item, items item = [s] items = [] return 'list/item' def start_refs(): nonlocal items items = [s] return 'refs' def start_para(): nonlocal para para = [s] return 'para' def start_break(): return 'break' switch = { 'row': start_table, 'item': start_list, 'ref': start_refs, 'line': start_para, 'blank': start_break, } try: for token, s in lex(text): if state == 'break': state = switch[token]() elif state == 'table/row': if token == 'row': rows.append(s) else: yield 'table', rows state = switch[token]() elif state == 'list/item': if token == 'line': item.append(s) else: items.append(' '.join(item)) if token == 'item': item = [s] elif token == 'blank': state = 'list' else: yield 'list', items state = switch[token]() elif state == 'list': if token == 'item': state = 'list/item' item = [s] else: yield 'list', items state = switch[token]() elif state == 'refs': if token == 'ref': items.append(s) else: yield 'refs', items state = switch[token]() elif state == 'para': if token == 'line': para.append(s) else: yield 'para', ' '.join(para) state = switch[token]() else: assert False, 'invalid state' except KeyError: assert False, 'invalid token' assert state == 'break'
[docs]def render(text, width=70, list_space=False, table_style=None): """ A crude renderer for a crude markup language intended for formatting documentation for the console. The markup recognized by this routine is as follows: .. code-block:: text * Paragraphs must be separated by at least one blank line. They will be wrapped to *width*. * Items in bulleted lists must start with an asterisk. No list nesting is permitted, but items may span several lines (without blank lines between them). Items will be wrapped to *width* and indented appropriately. * Lines beginning and ending with a pipe character are assumed to be table rows. Pipe characters also delimit columns within the row. The first row is assumed to be a header row and will be separated from the rest. An example table is shown below: | Command | Description | | cd | changes the current directory | | ls | lists the content of a directory | | cp | copies files | | mv | renames files | | rm | removes files | """ if table_style is None: table_style = {} para_wrapper = TextWrapper(width=width) list_wrapper = TextWrapper(width=width, initial_indent='* ', subsequent_indent=' ') table_wrapper = TableWrapper(width=width, **table_style) chunks = [] for token, data in parse(dedent(text)): if token == 'para': chunks.append(para_wrapper.fill(data)) elif token == 'list': if list_space: for item in data: chunks.append(list_wrapper.fill(item)) else: chunks.append('\n'.join( list_wrapper.fill(item) for item in data )) elif token == 'refs': ref_len = max(len(ref) for ref, link in data) chunks.append('\n'.join( para_wrapper.fill('{ref}:{space} {link}'.format( ref=ref, link=link, space=' ' * (ref_len - len(ref)))) for ref, link in data )) elif token == 'table': chunks.append(table_wrapper.fill(data)) else: assert False, 'invalid render state' return '\n\n'.join(chunks)