# -*- coding: utf-8 -*- """Module providing 'sequence awareness'.""" # std imports import re import math import textwrap # 3rd party import six from wcwidth import wcwidth # local from blessed._capabilities import CAPABILITIES_CAUSE_MOVEMENT __all__ = ('Sequence', 'SequenceTextWrapper', 'iter_parse', 'measure_length') class Termcap(object): """Terminal capability of given variable name and pattern.""" def __init__(self, name, pattern, attribute): """ Class initializer. :arg str name: name describing capability. :arg str pattern: regular expression string. :arg str attribute: :class:`~.Terminal` attribute used to build this terminal capability. """ self.name = name self.pattern = pattern self.attribute = attribute self._re_compiled = None def __repr__(self): # pylint: disable=redundant-keyword-arg return ''.format(self=self) @property def named_pattern(self): """Regular expression pattern for capability with named group.""" # pylint: disable=redundant-keyword-arg return '(?P<{self.name}>{self.pattern})'.format(self=self) @property def re_compiled(self): """Compiled regular expression pattern for capability.""" if self._re_compiled is None: self._re_compiled = re.compile(self.pattern) return self._re_compiled @property def will_move(self): """Whether capability causes cursor movement.""" return self.name in CAPABILITIES_CAUSE_MOVEMENT def horizontal_distance(self, text): """ Horizontal carriage adjusted by capability, may be negative. :rtype: int :arg str text: for capabilities *parm_left_cursor*, *parm_right_cursor*, provide the matching sequence text, its interpreted distance is returned. :returns: 0 except for matching ' """ value = { 'cursor_left': -1, 'backspace': -1, 'cursor_right': 1, 'tab': 8, 'ascii_tab': 8, }.get(self.name) if value is not None: return value unit = { 'parm_left_cursor': -1, 'parm_right_cursor': 1 }.get(self.name) if unit is not None: value = int(self.re_compiled.match(text).group(1)) return unit * value return 0 # pylint: disable=too-many-arguments @classmethod def build(cls, name, capability, attribute, nparams=0, numeric=99, match_grouped=False, match_any=False, match_optional=False): r""" Class factory builder for given capability definition. :arg str name: Variable name given for this pattern. :arg str capability: A unicode string representing a terminal capability to build for. When ``nparams`` is non-zero, it must be a callable unicode string (such as the result from ``getattr(term, 'bold')``. :arg str attribute: The terminfo(5) capability name by which this pattern is known. :arg int nparams: number of positional arguments for callable. :arg int numeric: Value to substitute into capability to when generating pattern :arg bool match_grouped: If the numeric pattern should be grouped, ``(\d+)`` when ``True``, ``\d+`` default. :arg bool match_any: When keyword argument ``nparams`` is given, *any* numeric found in output is suitable for building as pattern ``(\d+)``. Otherwise, only the first matching value of range *(numeric - 1)* through *(numeric + 1)* will be replaced by pattern ``(\d+)`` in builder. :arg bool match_optional: When ``True``, building of numeric patterns containing ``(\d+)`` will be built as optional, ``(\d+)?``. :rtype: blessed.sequences.Termcap :returns: Terminal capability instance for given capability definition """ _numeric_regex = r'\d+' if match_grouped: _numeric_regex = r'(\d+)' if match_optional: _numeric_regex = r'(\d+)?' numeric = 99 if numeric is None else numeric # basic capability attribute, not used as a callable if nparams == 0: return cls(name, re.escape(capability), attribute) # a callable capability accepting numeric argument _outp = re.escape(capability(*(numeric,) * nparams)) if not match_any: for num in range(numeric - 1, numeric + 2): if str(num) in _outp: pattern = _outp.replace(str(num), _numeric_regex) return cls(name, pattern, attribute) if match_grouped: pattern = re.sub(r'(\d+)', lambda x: _numeric_regex, _outp) else: pattern = re.sub(r'\d+', lambda x: _numeric_regex, _outp) return cls(name, pattern, attribute) class SequenceTextWrapper(textwrap.TextWrapper): """Docstring overridden.""" def __init__(self, width, term, **kwargs): """ Class initializer. This class supports the :meth:`~.Terminal.wrap` method. """ self.term = term textwrap.TextWrapper.__init__(self, width, **kwargs) def _wrap_chunks(self, chunks): """ Sequence-aware variant of :meth:`textwrap.TextWrapper._wrap_chunks`. :raises ValueError: ``self.width`` is not a positive integer :rtype: list :returns: text chunks adjusted for width This simply ensures that word boundaries are not broken mid-sequence, as standard python textwrap would incorrectly determine the length of a string containing sequences, and may also break consider sequences part of a "word" that may be broken by hyphen (``-``), where this implementation corrects both. """ lines = [] if self.width <= 0 or not isinstance(self.width, int): raise ValueError( "invalid width {0!r}({1!r}) (must be integer > 0)" .format(self.width, type(self.width))) term = self.term drop_whitespace = not hasattr(self, 'drop_whitespace' ) or self.drop_whitespace chunks.reverse() while chunks: cur_line = [] cur_len = 0 indent = self.subsequent_indent if lines else self.initial_indent width = self.width - len(indent) if drop_whitespace and ( Sequence(chunks[-1], term).strip() == '' and lines): del chunks[-1] while chunks: chunk_len = Sequence(chunks[-1], term).length() if cur_len + chunk_len > width: break cur_line.append(chunks.pop()) cur_len += chunk_len if chunks and Sequence(chunks[-1], term).length() > width: self._handle_long_word(chunks, cur_line, cur_len, width) if drop_whitespace and ( cur_line and Sequence(cur_line[-1], term).strip() == ''): del cur_line[-1] if cur_line: lines.append(indent + u''.join(cur_line)) return lines def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): """ Sequence-aware :meth:`textwrap.TextWrapper._handle_long_word`. This simply ensures that word boundaries are not broken mid-sequence, as standard python textwrap would incorrectly determine the length of a string containing sequences, and may also break consider sequences part of a "word" that may be broken by hyphen (``-``), where this implementation corrects both. """ # Figure out when indent is larger than the specified width, and make # sure at least one character is stripped off on every pass space_left = 1 if width < 1 else width - cur_len # If we're allowed to break long words, then do so: put as much # of the next chunk onto the current line as will fit. if self.break_long_words: term = self.term chunk = reversed_chunks[-1] idx = nxt = 0 for text, _ in iter_parse(term, chunk): nxt += len(text) if Sequence(chunk[:nxt], term).length() > space_left: break idx = nxt cur_line.append(chunk[:idx]) reversed_chunks[-1] = chunk[idx:] # Otherwise, we have to preserve the long word intact. Only add # it to the current line if there's nothing already there -- # that minimizes how much we violate the width constraint. elif not cur_line: cur_line.append(reversed_chunks.pop()) # If we're not allowed to break long words, and there's already # text on the current line, do nothing. Next time through the # main loop of _wrap_chunks(), we'll wind up here again, but # cur_len will be zero, so the next line will be entirely # devoted to the long word that we can't handle right now. SequenceTextWrapper.__doc__ = textwrap.TextWrapper.__doc__ class Sequence(six.text_type): """ A "sequence-aware" version of the base :class:`str` class. This unicode-derived class understands the effect of escape sequences of printable length, allowing a properly implemented :meth:`rjust`, :meth:`ljust`, :meth:`center`, and :meth:`length`. """ def __new__(cls, sequence_text, term): # pylint: disable = missing-return-doc, missing-return-type-doc """ Class constructor. :arg str sequence_text: A string that may contain sequences. :arg blessed.Terminal term: :class:`~.Terminal` instance. """ new = six.text_type.__new__(cls, sequence_text) new._term = term return new def ljust(self, width, fillchar=u' '): """ Return string containing sequences, left-adjusted. :arg int width: Total width given to left-adjust ``text``. If unspecified, the width of the attached terminal is used (default). :arg str fillchar: String for padding right-of ``text``. :returns: String of ``text``, left-aligned by ``width``. :rtype: str """ rightside = fillchar * int( (max(0.0, float(width.__index__() - self.length()))) / float(len(fillchar))) return u''.join((self, rightside)) def rjust(self, width, fillchar=u' '): """ Return string containing sequences, right-adjusted. :arg int width: Total width given to right-adjust ``text``. If unspecified, the width of the attached terminal is used (default). :arg str fillchar: String for padding left-of ``text``. :returns: String of ``text``, right-aligned by ``width``. :rtype: str """ leftside = fillchar * int( (max(0.0, float(width.__index__() - self.length()))) / float(len(fillchar))) return u''.join((leftside, self)) def center(self, width, fillchar=u' '): """ Return string containing sequences, centered. :arg int width: Total width given to center ``text``. If unspecified, the width of the attached terminal is used (default). :arg str fillchar: String for padding left and right-of ``text``. :returns: String of ``text``, centered by ``width``. :rtype: str """ split = max(0.0, float(width.__index__()) - self.length()) / 2 leftside = fillchar * int( (max(0.0, math.floor(split))) / float(len(fillchar))) rightside = fillchar * int( (max(0.0, math.ceil(split))) / float(len(fillchar))) return u''.join((leftside, self, rightside)) def truncate(self, width): """ Truncate a string in a sequence-aware manner. Any printable characters beyond ``width`` are removed, while all sequences remain in place. Horizontal Sequences are first expanded by :meth:`padd`. :arg int width: The printable width to truncate the string to. :rtype: str :returns: String truncated to at most ``width`` printable characters. """ output = "" current_width = 0 target_width = width.__index__() parsed_seq = iter_parse(self._term, self.padd()) # Retain all text until non-cap width reaches desired width for text, cap in parsed_seq: if not cap: # use wcwidth clipped to 0 because it can sometimes return -1 current_width += max(wcwidth(text), 0) if current_width > target_width: break output += text # Return with remaining caps appended return output + ''.join(text for text, cap in parsed_seq if cap) def length(self): r""" Return the printable length of string containing sequences. Strings containing ``term.left`` or ``\b`` will cause "overstrike", but a length less than 0 is not ever returned. So ``_\b+`` is a length of 1 (displays as ``+``), but ``\b`` alone is simply a length of 0. Some characters may consume more than one cell, mainly those CJK Unified Ideographs (Chinese, Japanese, Korean) defined by Unicode as half or full-width characters. For example: >>> from blessed import Terminal >>> from blessed.sequences import Sequence >>> term = Terminal() >>> msg = term.clear + term.red(u'コンニチハ') >>> Sequence(msg, term).length() 10 .. note:: Although accounted for, strings containing sequences such as ``term.clear`` will not give accurate returns, it is not considered lengthy (a length of 0). """ # because control characters may return -1, "clip" their length to 0. return sum(max(wcwidth(w_char), 0) for w_char in self.padd(strip=True)) def strip(self, chars=None): """ Return string of sequences, leading and trailing whitespace removed. :arg str chars: Remove characters in chars instead of whitespace. :rtype: str :returns: string of sequences with leading and trailing whitespace removed. """ return self.strip_seqs().strip(chars) def lstrip(self, chars=None): """ Return string of all sequences and leading whitespace removed. :arg str chars: Remove characters in chars instead of whitespace. :rtype: str :returns: string of sequences with leading removed. """ return self.strip_seqs().lstrip(chars) def rstrip(self, chars=None): """ Return string of all sequences and trailing whitespace removed. :arg str chars: Remove characters in chars instead of whitespace. :rtype: str :returns: string of sequences with trailing removed. """ return self.strip_seqs().rstrip(chars) def strip_seqs(self): """ Return ``text`` stripped of only its terminal sequences. :rtype: str :returns: Text with terminal sequences removed """ return self.padd(strip=True) def padd(self, strip=False): """ Return non-destructive horizontal movement as destructive spacing. :arg bool strip: Strip terminal sequences :rtype: str :returns: Text adjusted for horizontal movement """ outp = '' for text, cap in iter_parse(self._term, self): if not cap: outp += text continue value = cap.horizontal_distance(text) if value > 0: outp += ' ' * value elif value < 0: outp = outp[:value] elif not strip: outp += text return outp def iter_parse(term, text): """ Generator yields (text, capability) for characters of ``text``. value for ``capability`` may be ``None``, where ``text`` is :class:`str` of length 1. Otherwise, ``text`` is a full matching sequence of given capability. """ for match in term._caps_compiled_any.finditer(text): # pylint: disable=protected-access name = match.lastgroup value = match.group(name) if name == 'MISMATCH': yield (value, None) else: yield value, term.caps[name] def measure_length(text, term): """ .. deprecated:: 1.12.0. :rtype: int :returns: Length of the first sequence in the string """ try: text, capability = next(iter_parse(term, text)) if capability: return len(text) except StopIteration: return 0 return 0