File size: 3,474 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
from __future__ import annotations
from datetime import (
date,
datetime,
time,
timedelta,
)
from typing import (
TYPE_CHECKING,
Any,
Union,
)
from pandas.compat._optional import import_optional_dependency
from pandas.util._decorators import doc
import pandas as pd
from pandas.core.shared_docs import _shared_docs
from pandas.io.excel._base import BaseExcelReader
if TYPE_CHECKING:
from python_calamine import (
CalamineSheet,
CalamineWorkbook,
)
from pandas._typing import (
FilePath,
NaTType,
ReadBuffer,
Scalar,
StorageOptions,
)
_CellValue = Union[int, float, str, bool, time, date, datetime, timedelta]
class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
@doc(storage_options=_shared_docs["storage_options"])
def __init__(
self,
filepath_or_buffer: FilePath | ReadBuffer[bytes],
storage_options: StorageOptions | None = None,
engine_kwargs: dict | None = None,
) -> None:
"""
Reader using calamine engine (xlsx/xls/xlsb/ods).
Parameters
----------
filepath_or_buffer : str, path to be parsed or
an open readable stream.
{storage_options}
engine_kwargs : dict, optional
Arbitrary keyword arguments passed to excel engine.
"""
import_optional_dependency("python_calamine")
super().__init__(
filepath_or_buffer,
storage_options=storage_options,
engine_kwargs=engine_kwargs,
)
@property
def _workbook_class(self) -> type[CalamineWorkbook]:
from python_calamine import CalamineWorkbook
return CalamineWorkbook
def load_workbook(
self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
) -> CalamineWorkbook:
from python_calamine import load_workbook
return load_workbook(filepath_or_buffer, **engine_kwargs)
@property
def sheet_names(self) -> list[str]:
from python_calamine import SheetTypeEnum
return [
sheet.name
for sheet in self.book.sheets_metadata
if sheet.typ == SheetTypeEnum.WorkSheet
]
def get_sheet_by_name(self, name: str) -> CalamineSheet:
self.raise_if_bad_sheet_by_name(name)
return self.book.get_sheet_by_name(name)
def get_sheet_by_index(self, index: int) -> CalamineSheet:
self.raise_if_bad_sheet_by_index(index)
return self.book.get_sheet_by_index(index)
def get_sheet_data(
self, sheet: CalamineSheet, file_rows_needed: int | None = None
) -> list[list[Scalar | NaTType | time]]:
def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
if isinstance(value, float):
val = int(value)
if val == value:
return val
else:
return value
elif isinstance(value, date):
return pd.Timestamp(value)
elif isinstance(value, timedelta):
return pd.Timedelta(value)
elif isinstance(value, time):
return value
return value
rows: list[list[_CellValue]] = sheet.to_python(
skip_empty_area=False, nrows=file_rows_needed
)
data = [[_convert_cell(cell) for cell in row] for row in rows]
return data
|