Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
# Copyright (c) Louis Brulé Naudet. All Rights Reserved. | |
# This software may be used and distributed according to the terms of the License Agreement. | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import datasets | |
import polars as pl | |
class Dataset: | |
def load( | |
dataset_path:str | |
): | |
""" | |
Load a dataset from disk. | |
Parameters | |
---------- | |
dataset_path : str | |
The path to the dataset on disk. | |
Returns | |
------- | |
datasets.Dataset | |
The loaded dataset. | |
Notes | |
----- | |
This method statically loads a dataset from disk using the `load_from_disk` function | |
provided by the `datasets` module. The dataset is expected to be stored in a specific | |
format supported by the `datasets` library. | |
Example | |
------- | |
>>> dataset_path = "/path/to/dataset" | |
>>> dataset = Dataset.load(dataset_path) | |
""" | |
dataset = datasets.load_from_disk( | |
dataset_path=dataset_path | |
) | |
return dataset | |
def save( | |
dataset: datasets.Dataset, | |
dataset_path: str | |
) -> None: | |
""" | |
Save a dataset to disk. | |
Parameters | |
---------- | |
dataset : datasets.Dataset | |
The dataset to be saved. | |
dataset_path : str | |
The path where the dataset will be saved on disk. | |
Returns | |
------- | |
None | |
Notes | |
----- | |
This method statically saves a dataset to disk using the `save_to_disk` function | |
provided by the `datasets` module. The dataset is expected to be in a format | |
supported by the `datasets` library. | |
Example | |
------- | |
>>> dataset = load_dataset("my_dataset") | |
>>> dataset_path = "/path/to/save/dataset" | |
>>> Dataset.save(dataset, dataset_path) | |
""" | |
datasets.save_to_disk( | |
dataset, | |
dataset_path | |
) | |
return None | |
def convert_to_polars( | |
dataset: datasets.Dataset | |
) -> pl.DataFrame: | |
""" | |
Convert a dataset to a Polars DataFrame. | |
Parameters | |
---------- | |
dataset : datasets.Dataset | |
The dataset to be converted to a Polars DataFrame. | |
Returns | |
------- | |
pl.DataFrame | |
A Polars DataFrame representing the dataset. | |
Notes | |
----- | |
This method converts a dataset object to a Polars DataFrame, which is a | |
memory-efficient and fast data manipulation library for Rust. | |
Raises | |
------ | |
Exception | |
If an error occurs during the conversion process. | |
Examples | |
-------- | |
>>> dataset = datasets.Dataset(data=arrow_table) | |
>>> dataframe = ClassName.convert_to_polars(dataset) | |
""" | |
try: | |
dataframe = pl.from_arrow(dataset.data.table).with_row_index() | |
except: | |
dataframe = pl.from_arrow(dataset.data.table).with_row_count( | |
name="index" | |
) | |
return dataframe | |