|
from __future__ import annotations |
|
|
|
from datetime import time |
|
import math |
|
from typing import TYPE_CHECKING |
|
|
|
import numpy as np |
|
|
|
from pandas.compat._optional import import_optional_dependency |
|
from pandas.util._decorators import doc |
|
|
|
from pandas.core.shared_docs import _shared_docs |
|
|
|
from pandas.io.excel._base import BaseExcelReader |
|
|
|
if TYPE_CHECKING: |
|
from xlrd import Book |
|
|
|
from pandas._typing import ( |
|
Scalar, |
|
StorageOptions, |
|
) |
|
|
|
|
|
class XlrdReader(BaseExcelReader["Book"]): |
|
@doc(storage_options=_shared_docs["storage_options"]) |
|
def __init__( |
|
self, |
|
filepath_or_buffer, |
|
storage_options: StorageOptions | None = None, |
|
engine_kwargs: dict | None = None, |
|
) -> None: |
|
""" |
|
Reader using xlrd engine. |
|
|
|
Parameters |
|
---------- |
|
filepath_or_buffer : str, path object or Workbook |
|
Object to be parsed. |
|
{storage_options} |
|
engine_kwargs : dict, optional |
|
Arbitrary keyword arguments passed to excel engine. |
|
""" |
|
err_msg = "Install xlrd >= 2.0.1 for xls Excel support" |
|
import_optional_dependency("xlrd", extra=err_msg) |
|
super().__init__( |
|
filepath_or_buffer, |
|
storage_options=storage_options, |
|
engine_kwargs=engine_kwargs, |
|
) |
|
|
|
@property |
|
def _workbook_class(self) -> type[Book]: |
|
from xlrd import Book |
|
|
|
return Book |
|
|
|
def load_workbook(self, filepath_or_buffer, engine_kwargs) -> Book: |
|
from xlrd import open_workbook |
|
|
|
if hasattr(filepath_or_buffer, "read"): |
|
data = filepath_or_buffer.read() |
|
return open_workbook(file_contents=data, **engine_kwargs) |
|
else: |
|
return open_workbook(filepath_or_buffer, **engine_kwargs) |
|
|
|
@property |
|
def sheet_names(self): |
|
return self.book.sheet_names() |
|
|
|
def get_sheet_by_name(self, name): |
|
self.raise_if_bad_sheet_by_name(name) |
|
return self.book.sheet_by_name(name) |
|
|
|
def get_sheet_by_index(self, index): |
|
self.raise_if_bad_sheet_by_index(index) |
|
return self.book.sheet_by_index(index) |
|
|
|
def get_sheet_data( |
|
self, sheet, file_rows_needed: int | None = None |
|
) -> list[list[Scalar]]: |
|
from xlrd import ( |
|
XL_CELL_BOOLEAN, |
|
XL_CELL_DATE, |
|
XL_CELL_ERROR, |
|
XL_CELL_NUMBER, |
|
xldate, |
|
) |
|
|
|
epoch1904 = self.book.datemode |
|
|
|
def _parse_cell(cell_contents, cell_typ): |
|
""" |
|
converts the contents of the cell into a pandas appropriate object |
|
""" |
|
if cell_typ == XL_CELL_DATE: |
|
|
|
try: |
|
cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) |
|
except OverflowError: |
|
return cell_contents |
|
|
|
|
|
|
|
|
|
year = (cell_contents.timetuple())[0:3] |
|
if (not epoch1904 and year == (1899, 12, 31)) or ( |
|
epoch1904 and year == (1904, 1, 1) |
|
): |
|
cell_contents = time( |
|
cell_contents.hour, |
|
cell_contents.minute, |
|
cell_contents.second, |
|
cell_contents.microsecond, |
|
) |
|
|
|
elif cell_typ == XL_CELL_ERROR: |
|
cell_contents = np.nan |
|
elif cell_typ == XL_CELL_BOOLEAN: |
|
cell_contents = bool(cell_contents) |
|
elif cell_typ == XL_CELL_NUMBER: |
|
|
|
|
|
if math.isfinite(cell_contents): |
|
|
|
val = int(cell_contents) |
|
if val == cell_contents: |
|
cell_contents = val |
|
return cell_contents |
|
|
|
data = [] |
|
|
|
nrows = sheet.nrows |
|
if file_rows_needed is not None: |
|
nrows = min(nrows, file_rows_needed) |
|
for i in range(nrows): |
|
row = [ |
|
_parse_cell(value, typ) |
|
for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) |
|
] |
|
data.append(row) |
|
|
|
return data |
|
|