|
""" |
|
Module for formatting output data in console (to string). |
|
""" |
|
from __future__ import annotations |
|
|
|
from shutil import get_terminal_size |
|
from typing import TYPE_CHECKING |
|
|
|
import numpy as np |
|
|
|
from pandas.io.formats.printing import pprint_thing |
|
|
|
if TYPE_CHECKING: |
|
from collections.abc import Iterable |
|
|
|
from pandas.io.formats.format import DataFrameFormatter |
|
|
|
|
|
class StringFormatter: |
|
"""Formatter for string representation of a dataframe.""" |
|
|
|
def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None: |
|
self.fmt = fmt |
|
self.adj = fmt.adj |
|
self.frame = fmt.frame |
|
self.line_width = line_width |
|
|
|
def to_string(self) -> str: |
|
text = self._get_string_representation() |
|
if self.fmt.should_show_dimensions: |
|
text = f"{text}{self.fmt.dimensions_info}" |
|
return text |
|
|
|
def _get_strcols(self) -> list[list[str]]: |
|
strcols = self.fmt.get_strcols() |
|
if self.fmt.is_truncated: |
|
strcols = self._insert_dot_separators(strcols) |
|
return strcols |
|
|
|
def _get_string_representation(self) -> str: |
|
if self.fmt.frame.empty: |
|
return self._empty_info_line |
|
|
|
strcols = self._get_strcols() |
|
|
|
if self.line_width is None: |
|
|
|
return self.adj.adjoin(1, *strcols) |
|
|
|
if self._need_to_wrap_around: |
|
return self._join_multiline(strcols) |
|
|
|
return self._fit_strcols_to_terminal_width(strcols) |
|
|
|
@property |
|
def _empty_info_line(self) -> str: |
|
return ( |
|
f"Empty {type(self.frame).__name__}\n" |
|
f"Columns: {pprint_thing(self.frame.columns)}\n" |
|
f"Index: {pprint_thing(self.frame.index)}" |
|
) |
|
|
|
@property |
|
def _need_to_wrap_around(self) -> bool: |
|
return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0) |
|
|
|
def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]: |
|
str_index = self.fmt._get_formatted_index(self.fmt.tr_frame) |
|
index_length = len(str_index) |
|
|
|
if self.fmt.is_truncated_horizontally: |
|
strcols = self._insert_dot_separator_horizontal(strcols, index_length) |
|
|
|
if self.fmt.is_truncated_vertically: |
|
strcols = self._insert_dot_separator_vertical(strcols, index_length) |
|
|
|
return strcols |
|
|
|
@property |
|
def _adjusted_tr_col_num(self) -> int: |
|
return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num |
|
|
|
def _insert_dot_separator_horizontal( |
|
self, strcols: list[list[str]], index_length: int |
|
) -> list[list[str]]: |
|
strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length) |
|
return strcols |
|
|
|
def _insert_dot_separator_vertical( |
|
self, strcols: list[list[str]], index_length: int |
|
) -> list[list[str]]: |
|
n_header_rows = index_length - len(self.fmt.tr_frame) |
|
row_num = self.fmt.tr_row_num |
|
for ix, col in enumerate(strcols): |
|
cwidth = self.adj.len(col[row_num]) |
|
|
|
if self.fmt.is_truncated_horizontally: |
|
is_dot_col = ix == self._adjusted_tr_col_num |
|
else: |
|
is_dot_col = False |
|
|
|
if cwidth > 3 or is_dot_col: |
|
dots = "..." |
|
else: |
|
dots = ".." |
|
|
|
if ix == 0 and self.fmt.index: |
|
dot_mode = "left" |
|
elif is_dot_col: |
|
cwidth = 4 |
|
dot_mode = "right" |
|
else: |
|
dot_mode = "right" |
|
|
|
dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0] |
|
col.insert(row_num + n_header_rows, dot_str) |
|
return strcols |
|
|
|
def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str: |
|
lwidth = self.line_width |
|
adjoin_width = 1 |
|
strcols = list(strcols_input) |
|
|
|
if self.fmt.index: |
|
idx = strcols.pop(0) |
|
lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width |
|
|
|
col_widths = [ |
|
np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0 |
|
for col in strcols |
|
] |
|
|
|
assert lwidth is not None |
|
col_bins = _binify(col_widths, lwidth) |
|
nbins = len(col_bins) |
|
|
|
str_lst = [] |
|
start = 0 |
|
for i, end in enumerate(col_bins): |
|
row = strcols[start:end] |
|
if self.fmt.index: |
|
row.insert(0, idx) |
|
if nbins > 1: |
|
nrows = len(row[-1]) |
|
if end <= len(strcols) and i < nbins - 1: |
|
row.append([" \\"] + [" "] * (nrows - 1)) |
|
else: |
|
row.append([" "] * nrows) |
|
str_lst.append(self.adj.adjoin(adjoin_width, *row)) |
|
start = end |
|
return "\n\n".join(str_lst) |
|
|
|
def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str: |
|
from pandas import Series |
|
|
|
lines = self.adj.adjoin(1, *strcols).split("\n") |
|
max_len = Series(lines).str.len().max() |
|
|
|
width, _ = get_terminal_size() |
|
dif = max_len - width |
|
|
|
adj_dif = dif + 1 |
|
col_lens = Series([Series(ele).str.len().max() for ele in strcols]) |
|
n_cols = len(col_lens) |
|
counter = 0 |
|
while adj_dif > 0 and n_cols > 1: |
|
counter += 1 |
|
mid = round(n_cols / 2) |
|
mid_ix = col_lens.index[mid] |
|
col_len = col_lens[mid_ix] |
|
|
|
adj_dif -= col_len + 1 |
|
col_lens = col_lens.drop(mid_ix) |
|
n_cols = len(col_lens) |
|
|
|
|
|
max_cols_fitted = n_cols - self.fmt.index |
|
|
|
max_cols_fitted = max(max_cols_fitted, 2) |
|
self.fmt.max_cols_fitted = max_cols_fitted |
|
|
|
|
|
|
|
self.fmt.truncate() |
|
strcols = self._get_strcols() |
|
return self.adj.adjoin(1, *strcols) |
|
|
|
|
|
def _binify(cols: list[int], line_width: int) -> list[int]: |
|
adjoin_width = 1 |
|
bins = [] |
|
curr_width = 0 |
|
i_last_column = len(cols) - 1 |
|
for i, w in enumerate(cols): |
|
w_adjoined = w + adjoin_width |
|
curr_width += w_adjoined |
|
if i_last_column == i: |
|
wrap = curr_width + 1 > line_width and i > 0 |
|
else: |
|
wrap = curr_width + 2 > line_width and i > 0 |
|
if wrap: |
|
bins.append(i) |
|
curr_width = w_adjoined |
|
|
|
bins.append(len(cols)) |
|
return bins |
|
|