File size: 4,885 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
"""
Read SAS sas7bdat or xport files.
"""
from __future__ import annotations
from abc import (
ABC,
abstractmethod,
)
from typing import (
TYPE_CHECKING,
overload,
)
from pandas.util._decorators import doc
from pandas.core.shared_docs import _shared_docs
from pandas.io.common import stringify_path
if TYPE_CHECKING:
from collections.abc import Hashable
from types import TracebackType
from pandas._typing import (
CompressionOptions,
FilePath,
ReadBuffer,
Self,
)
from pandas import DataFrame
class ReaderBase(ABC):
"""
Protocol for XportReader and SAS7BDATReader classes.
"""
@abstractmethod
def read(self, nrows: int | None = None) -> DataFrame:
...
@abstractmethod
def close(self) -> None:
...
def __enter__(self) -> Self:
return self
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> None:
self.close()
@overload
def read_sas(
filepath_or_buffer: FilePath | ReadBuffer[bytes],
*,
format: str | None = ...,
index: Hashable | None = ...,
encoding: str | None = ...,
chunksize: int = ...,
iterator: bool = ...,
compression: CompressionOptions = ...,
) -> ReaderBase:
...
@overload
def read_sas(
filepath_or_buffer: FilePath | ReadBuffer[bytes],
*,
format: str | None = ...,
index: Hashable | None = ...,
encoding: str | None = ...,
chunksize: None = ...,
iterator: bool = ...,
compression: CompressionOptions = ...,
) -> DataFrame | ReaderBase:
...
@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer")
def read_sas(
filepath_or_buffer: FilePath | ReadBuffer[bytes],
*,
format: str | None = None,
index: Hashable | None = None,
encoding: str | None = None,
chunksize: int | None = None,
iterator: bool = False,
compression: CompressionOptions = "infer",
) -> DataFrame | ReaderBase:
"""
Read SAS files stored as either XPORT or SAS7BDAT format files.
Parameters
----------
filepath_or_buffer : str, path object, or file-like object
String, path object (implementing ``os.PathLike[str]``), or file-like
object implementing a binary ``read()`` function. The string could be a URL.
Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
expected. A local file could be:
``file://localhost/path/to/table.sas7bdat``.
format : str {{'xport', 'sas7bdat'}} or None
If None, file format is inferred from file extension. If 'xport' or
'sas7bdat', uses the corresponding format.
index : identifier of index column, defaults to None
Identifier of column that should be used as index of the DataFrame.
encoding : str, default is None
Encoding for text data. If None, text data are stored as raw bytes.
chunksize : int
Read file `chunksize` lines at a time, returns iterator.
iterator : bool, defaults to False
If True, returns an iterator for reading the file incrementally.
{decompression_options}
Returns
-------
DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
or XportReader
Examples
--------
>>> df = pd.read_sas("sas_data.sas7bdat") # doctest: +SKIP
"""
if format is None:
buffer_error_msg = (
"If this is a buffer object rather "
"than a string name, you must specify a format string"
)
filepath_or_buffer = stringify_path(filepath_or_buffer)
if not isinstance(filepath_or_buffer, str):
raise ValueError(buffer_error_msg)
fname = filepath_or_buffer.lower()
if ".xpt" in fname:
format = "xport"
elif ".sas7bdat" in fname:
format = "sas7bdat"
else:
raise ValueError(
f"unable to infer format of SAS file from filename: {repr(fname)}"
)
reader: ReaderBase
if format.lower() == "xport":
from pandas.io.sas.sas_xport import XportReader
reader = XportReader(
filepath_or_buffer,
index=index,
encoding=encoding,
chunksize=chunksize,
compression=compression,
)
elif format.lower() == "sas7bdat":
from pandas.io.sas.sas7bdat import SAS7BDATReader
reader = SAS7BDATReader(
filepath_or_buffer,
index=index,
encoding=encoding,
chunksize=chunksize,
compression=compression,
)
else:
raise ValueError("unknown SAS format")
if iterator or chunksize:
return reader
with reader:
return reader.read()
|