File size: 6,320 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
""" io on the clipboard """
from __future__ import annotations
from io import StringIO
from typing import TYPE_CHECKING
import warnings
from pandas._libs import lib
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import check_dtype_backend
from pandas.core.dtypes.generic import ABCDataFrame
from pandas import (
get_option,
option_context,
)
if TYPE_CHECKING:
from pandas._typing import DtypeBackend
def read_clipboard(
sep: str = r"\s+",
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
**kwargs,
): # pragma: no cover
r"""
Read text from clipboard and pass to :func:`~pandas.read_csv`.
Parses clipboard contents similar to how CSV files are parsed
using :func:`~pandas.read_csv`.
Parameters
----------
sep : str, default '\\s+'
A string or regex delimiter. The default of ``'\\s+'`` denotes
one or more whitespace characters.
dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
Back-end data type applied to the resultant :class:`DataFrame`
(still experimental). Behaviour is as follows:
* ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
(default).
* ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
DataFrame.
.. versionadded:: 2.0
**kwargs
See :func:`~pandas.read_csv` for the full argument list.
Returns
-------
DataFrame
A parsed :class:`~pandas.DataFrame` object.
See Also
--------
DataFrame.to_clipboard : Copy object to the system clipboard.
read_csv : Read a comma-separated values (csv) file into DataFrame.
read_fwf : Read a table of fixed-width formatted lines into DataFrame.
Examples
--------
>>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
>>> df.to_clipboard() # doctest: +SKIP
>>> pd.read_clipboard() # doctest: +SKIP
A B C
0 1 2 3
1 4 5 6
"""
encoding = kwargs.pop("encoding", "utf-8")
# only utf-8 is valid for passed value because that's what clipboard
# supports
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
raise NotImplementedError("reading from clipboard only supports utf-8 encoding")
check_dtype_backend(dtype_backend)
from pandas.io.clipboard import clipboard_get
from pandas.io.parsers import read_csv
text = clipboard_get()
# Try to decode (if needed, as "text" might already be a string here).
try:
text = text.decode(kwargs.get("encoding") or get_option("display.encoding"))
except AttributeError:
pass
# Excel copies into clipboard with \t separation
# inspect no more then the 10 first lines, if they
# all contain an equal number (>0) of tabs, infer
# that this came from excel and set 'sep' accordingly
lines = text[:10000].split("\n")[:-1][:10]
# Need to remove leading white space, since read_csv
# accepts:
# a b
# 0 1 2
# 1 3 4
counts = {x.lstrip(" ").count("\t") for x in lines}
if len(lines) > 1 and len(counts) == 1 and counts.pop() != 0:
sep = "\t"
# check the number of leading tabs in the first line
# to account for index columns
index_length = len(lines[0]) - len(lines[0].lstrip(" \t"))
if index_length != 0:
kwargs.setdefault("index_col", list(range(index_length)))
# Edge case where sep is specified to be None, return to default
if sep is None and kwargs.get("delim_whitespace") is None:
sep = r"\s+"
# Regex separator currently only works with python engine.
# Default to python if separator is multi-character (regex)
if len(sep) > 1 and kwargs.get("engine") is None:
kwargs["engine"] = "python"
elif len(sep) > 1 and kwargs.get("engine") == "c":
warnings.warn(
"read_clipboard with regex separator does not work properly with c engine.",
stacklevel=find_stack_level(),
)
return read_csv(StringIO(text), sep=sep, dtype_backend=dtype_backend, **kwargs)
def to_clipboard(
obj, excel: bool | None = True, sep: str | None = None, **kwargs
) -> None: # pragma: no cover
"""
Attempt to write text representation of object to the system clipboard
The clipboard can be then pasted into Excel for example.
Parameters
----------
obj : the object to write to the clipboard
excel : bool, defaults to True
if True, use the provided separator, writing in a csv
format for allowing easy pasting into excel.
if False, write a string representation of the object
to the clipboard
sep : optional, defaults to tab
other keywords are passed to to_csv
Notes
-----
Requirements for your platform
- Linux: xclip, or xsel (with PyQt4 modules)
- Windows:
- OS X:
"""
encoding = kwargs.pop("encoding", "utf-8")
# testing if an invalid encoding is passed to clipboard
if encoding is not None and encoding.lower().replace("-", "") != "utf8":
raise ValueError("clipboard only supports utf-8 encoding")
from pandas.io.clipboard import clipboard_set
if excel is None:
excel = True
if excel:
try:
if sep is None:
sep = "\t"
buf = StringIO()
# clipboard_set (pyperclip) expects unicode
obj.to_csv(buf, sep=sep, encoding="utf-8", **kwargs)
text = buf.getvalue()
clipboard_set(text)
return
except TypeError:
warnings.warn(
"to_clipboard in excel mode requires a single character separator.",
stacklevel=find_stack_level(),
)
elif sep is not None:
warnings.warn(
"to_clipboard with excel=False ignores the sep argument.",
stacklevel=find_stack_level(),
)
if isinstance(obj, ABCDataFrame):
# str(df) has various unhelpful defaults, like truncation
with option_context("display.max_colwidth", None):
objstr = obj.to_string(**kwargs)
else:
objstr = str(obj)
clipboard_set(objstr)
|