from textwrap import dedent | |
import numpy as np | |
import pytest | |
from pandas.errors import ( | |
PyperclipException, | |
PyperclipWindowsException, | |
) | |
import pandas as pd | |
from pandas import ( | |
NA, | |
DataFrame, | |
Series, | |
get_option, | |
read_clipboard, | |
) | |
import pandas._testing as tm | |
from pandas.core.arrays import ( | |
ArrowStringArray, | |
StringArray, | |
) | |
from pandas.io.clipboard import ( | |
CheckedCall, | |
_stringifyText, | |
init_qt_clipboard, | |
) | |
def build_kwargs(sep, excel): | |
kwargs = {} | |
if excel != "default": | |
kwargs["excel"] = excel | |
if sep != "default": | |
kwargs["sep"] = sep | |
return kwargs | |
def df(request): | |
data_type = request.param | |
if data_type == "delims": | |
return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]}) | |
elif data_type == "utf8": | |
return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]}) | |
elif data_type == "utf16": | |
return DataFrame( | |
{"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} | |
) | |
elif data_type == "string": | |
return DataFrame( | |
np.array([f"i-{i}" for i in range(15)]).reshape(5, 3), columns=list("abc") | |
) | |
elif data_type == "long": | |
max_rows = get_option("display.max_rows") | |
return DataFrame( | |
np.random.default_rng(2).integers(0, 10, size=(max_rows + 1, 3)), | |
columns=list("abc"), | |
) | |
elif data_type == "nonascii": | |
return DataFrame({"en": "in English".split(), "es": "en español".split()}) | |
elif data_type == "colwidth": | |
_cw = get_option("display.max_colwidth") + 1 | |
return DataFrame( | |
np.array(["x" * _cw for _ in range(15)]).reshape(5, 3), columns=list("abc") | |
) | |
elif data_type == "mixed": | |
return DataFrame( | |
{ | |
"a": np.arange(1.0, 6.0) + 0.01, | |
"b": np.arange(1, 6).astype(np.int64), | |
"c": list("abcde"), | |
} | |
) | |
elif data_type == "float": | |
return DataFrame(np.random.default_rng(2).random((5, 3)), columns=list("abc")) | |
elif data_type == "int": | |
return DataFrame( | |
np.random.default_rng(2).integers(0, 10, (5, 3)), columns=list("abc") | |
) | |
else: | |
raise ValueError | |
def mock_ctypes(monkeypatch): | |
""" | |
Mocks WinError to help with testing the clipboard. | |
""" | |
def _mock_win_error(): | |
return "Window Error" | |
# Set raising to False because WinError won't exist on non-windows platforms | |
with monkeypatch.context() as m: | |
m.setattr("ctypes.WinError", _mock_win_error, raising=False) | |
yield | |
def test_checked_call_with_bad_call(monkeypatch): | |
""" | |
Give CheckCall a function that returns a falsey value and | |
mock get_errno so it returns false so an exception is raised. | |
""" | |
def _return_false(): | |
return False | |
monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: True) | |
msg = f"Error calling {_return_false.__name__} \\(Window Error\\)" | |
with pytest.raises(PyperclipWindowsException, match=msg): | |
CheckedCall(_return_false)() | |
def test_checked_call_with_valid_call(monkeypatch): | |
""" | |
Give CheckCall a function that returns a truthy value and | |
mock get_errno so it returns true so an exception is not raised. | |
The function should return the results from _return_true. | |
""" | |
def _return_true(): | |
return True | |
monkeypatch.setattr("pandas.io.clipboard.get_errno", lambda: False) | |
# Give CheckedCall a callable that returns a truthy value s | |
checked_call = CheckedCall(_return_true) | |
assert checked_call() is True | |
def test_stringify_text(text): | |
valid_types = (str, int, float, bool) | |
if isinstance(text, valid_types): | |
result = _stringifyText(text) | |
assert result == str(text) | |
else: | |
msg = ( | |
"only str, int, float, and bool values " | |
f"can be copied to the clipboard, not {type(text).__name__}" | |
) | |
with pytest.raises(PyperclipException, match=msg): | |
_stringifyText(text) | |
def set_pyqt_clipboard(monkeypatch): | |
qt_cut, qt_paste = init_qt_clipboard() | |
with monkeypatch.context() as m: | |
m.setattr(pd.io.clipboard, "clipboard_set", qt_cut) | |
m.setattr(pd.io.clipboard, "clipboard_get", qt_paste) | |
yield | |
def clipboard(qapp): | |
clip = qapp.clipboard() | |
yield clip | |
clip.clear() | |
class TestClipboard: | |
# Test that default arguments copy as tab delimited | |
# Test that explicit delimiters are respected | |
def test_round_trip_frame_sep(self, df, sep, encoding): | |
df.to_clipboard(excel=None, sep=sep, encoding=encoding) | |
result = read_clipboard(sep=sep or "\t", index_col=0, encoding=encoding) | |
tm.assert_frame_equal(df, result) | |
# Test white space separator | |
def test_round_trip_frame_string(self, df): | |
df.to_clipboard(excel=False, sep=None) | |
result = read_clipboard() | |
assert df.to_string() == result.to_string() | |
assert df.shape == result.shape | |
# Two character separator is not supported in to_clipboard | |
# Test that multi-character separators are not silently passed | |
def test_excel_sep_warning(self, df): | |
with tm.assert_produces_warning( | |
UserWarning, | |
match="to_clipboard in excel mode requires a single character separator.", | |
check_stacklevel=False, | |
): | |
df.to_clipboard(excel=True, sep=r"\t") | |
# Separator is ignored when excel=False and should produce a warning | |
def test_copy_delim_warning(self, df): | |
with tm.assert_produces_warning(): | |
df.to_clipboard(excel=False, sep="\t") | |
# Tests that the default behavior of to_clipboard is tab | |
# delimited and excel="True" | |
def test_clipboard_copy_tabs_default(self, sep, excel, df, clipboard): | |
kwargs = build_kwargs(sep, excel) | |
df.to_clipboard(**kwargs) | |
assert clipboard.text() == df.to_csv(sep="\t") | |
# Tests reading of white space separated tables | |
def test_clipboard_copy_strings(self, sep, df): | |
kwargs = build_kwargs(sep, False) | |
df.to_clipboard(**kwargs) | |
result = read_clipboard(sep=r"\s+") | |
assert result.to_string() == df.to_string() | |
assert df.shape == result.shape | |
def test_read_clipboard_infer_excel(self, clipboard): | |
# gh-19010: avoid warnings | |
clip_kwargs = {"engine": "python"} | |
text = dedent( | |
""" | |
John James\tCharlie Mingus | |
1\t2 | |
4\tHarry Carney | |
""".strip() | |
) | |
clipboard.setText(text) | |
df = read_clipboard(**clip_kwargs) | |
# excel data is parsed correctly | |
assert df.iloc[1, 1] == "Harry Carney" | |
# having diff tab counts doesn't trigger it | |
text = dedent( | |
""" | |
a\t b | |
1 2 | |
3 4 | |
""".strip() | |
) | |
clipboard.setText(text) | |
res = read_clipboard(**clip_kwargs) | |
text = dedent( | |
""" | |
a b | |
1 2 | |
3 4 | |
""".strip() | |
) | |
clipboard.setText(text) | |
exp = read_clipboard(**clip_kwargs) | |
tm.assert_frame_equal(res, exp) | |
def test_infer_excel_with_nulls(self, clipboard): | |
# GH41108 | |
text = "col1\tcol2\n1\tred\n\tblue\n2\tgreen" | |
clipboard.setText(text) | |
df = read_clipboard() | |
df_expected = DataFrame( | |
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]} | |
) | |
# excel data is parsed correctly | |
tm.assert_frame_equal(df, df_expected) | |
def test_infer_excel_with_multiindex(self, clipboard, multiindex): | |
# GH41108 | |
clipboard.setText(multiindex[0]) | |
df = read_clipboard() | |
df_expected = DataFrame( | |
data={"col1": [1, None, 2], "col2": ["red", "blue", "green"]}, | |
index=multiindex[1], | |
) | |
# excel data is parsed correctly | |
tm.assert_frame_equal(df, df_expected) | |
def test_invalid_encoding(self, df): | |
msg = "clipboard only supports utf-8 encoding" | |
# test case for testing invalid encoding | |
with pytest.raises(ValueError, match=msg): | |
df.to_clipboard(encoding="ascii") | |
with pytest.raises(NotImplementedError, match=msg): | |
read_clipboard(encoding="ascii") | |
def test_raw_roundtrip(self, data): | |
# PR #25040 wide unicode wasn't copied correctly on PY3 on windows | |
df = DataFrame({"data": [data]}) | |
df.to_clipboard() | |
result = read_clipboard() | |
tm.assert_frame_equal(df, result) | |
def test_read_clipboard_dtype_backend( | |
self, clipboard, string_storage, dtype_backend, engine | |
): | |
# GH#50502 | |
if string_storage == "pyarrow" or dtype_backend == "pyarrow": | |
pa = pytest.importorskip("pyarrow") | |
if string_storage == "python": | |
string_array = StringArray(np.array(["x", "y"], dtype=np.object_)) | |
string_array_na = StringArray(np.array(["x", NA], dtype=np.object_)) | |
elif dtype_backend == "pyarrow" and engine != "c": | |
pa = pytest.importorskip("pyarrow") | |
from pandas.arrays import ArrowExtensionArray | |
string_array = ArrowExtensionArray(pa.array(["x", "y"])) | |
string_array_na = ArrowExtensionArray(pa.array(["x", None])) | |
else: | |
string_array = ArrowStringArray(pa.array(["x", "y"])) | |
string_array_na = ArrowStringArray(pa.array(["x", None])) | |
text = """a,b,c,d,e,f,g,h,i | |
x,1,4.0,x,2,4.0,,True,False | |
y,2,5.0,,,,,False,""" | |
clipboard.setText(text) | |
with pd.option_context("mode.string_storage", string_storage): | |
result = read_clipboard(sep=",", dtype_backend=dtype_backend, engine=engine) | |
expected = DataFrame( | |
{ | |
"a": string_array, | |
"b": Series([1, 2], dtype="Int64"), | |
"c": Series([4.0, 5.0], dtype="Float64"), | |
"d": string_array_na, | |
"e": Series([2, NA], dtype="Int64"), | |
"f": Series([4.0, NA], dtype="Float64"), | |
"g": Series([NA, NA], dtype="Int64"), | |
"h": Series([True, False], dtype="boolean"), | |
"i": Series([False, NA], dtype="boolean"), | |
} | |
) | |
if dtype_backend == "pyarrow": | |
from pandas.arrays import ArrowExtensionArray | |
expected = DataFrame( | |
{ | |
col: ArrowExtensionArray(pa.array(expected[col], from_pandas=True)) | |
for col in expected.columns | |
} | |
) | |
expected["g"] = ArrowExtensionArray(pa.array([None, None])) | |
tm.assert_frame_equal(result, expected) | |
def test_invalid_dtype_backend(self): | |
msg = ( | |
"dtype_backend numpy is invalid, only 'numpy_nullable' and " | |
"'pyarrow' are allowed." | |
) | |
with pytest.raises(ValueError, match=msg): | |
read_clipboard(dtype_backend="numpy") | |
def test_to_clipboard_pos_args_deprecation(self): | |
# GH-54229 | |
df = DataFrame({"a": [1, 2, 3]}) | |
msg = ( | |
r"Starting with pandas version 3.0 all arguments of to_clipboard " | |
r"will be keyword-only." | |
) | |
with tm.assert_produces_warning(FutureWarning, match=msg): | |
df.to_clipboard(True, None) | |