Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| # Copyright (c) Louis Brulé Naudet. All Rights Reserved. | |
| # This software may be used and distributed according to the terms of the License Agreement. | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import datasets | |
| import polars as pl | |
| class Dataset: | |
| def load( | |
| dataset_path:str | |
| ): | |
| """ | |
| Load a dataset from disk. | |
| Parameters | |
| ---------- | |
| dataset_path : str | |
| The path to the dataset on disk. | |
| Returns | |
| ------- | |
| datasets.Dataset | |
| The loaded dataset. | |
| Notes | |
| ----- | |
| This method statically loads a dataset from disk using the `load_from_disk` function | |
| provided by the `datasets` module. The dataset is expected to be stored in a specific | |
| format supported by the `datasets` library. | |
| Example | |
| ------- | |
| >>> dataset_path = "/path/to/dataset" | |
| >>> dataset = Dataset.load(dataset_path) | |
| """ | |
| dataset = datasets.load_from_disk( | |
| dataset_path=dataset_path | |
| ) | |
| return dataset | |
| def save( | |
| dataset: datasets.Dataset, | |
| dataset_path: str | |
| ) -> None: | |
| """ | |
| Save a dataset to disk. | |
| Parameters | |
| ---------- | |
| dataset : datasets.Dataset | |
| The dataset to be saved. | |
| dataset_path : str | |
| The path where the dataset will be saved on disk. | |
| Returns | |
| ------- | |
| None | |
| Notes | |
| ----- | |
| This method statically saves a dataset to disk using the `save_to_disk` function | |
| provided by the `datasets` module. The dataset is expected to be in a format | |
| supported by the `datasets` library. | |
| Example | |
| ------- | |
| >>> dataset = load_dataset("my_dataset") | |
| >>> dataset_path = "/path/to/save/dataset" | |
| >>> Dataset.save(dataset, dataset_path) | |
| """ | |
| datasets.save_to_disk( | |
| dataset, | |
| dataset_path | |
| ) | |
| return None | |
| def convert_to_polars( | |
| dataset: datasets.Dataset | |
| ) -> pl.DataFrame: | |
| """ | |
| Convert a dataset to a Polars DataFrame. | |
| Parameters | |
| ---------- | |
| dataset : datasets.Dataset | |
| The dataset to be converted to a Polars DataFrame. | |
| Returns | |
| ------- | |
| pl.DataFrame | |
| A Polars DataFrame representing the dataset. | |
| Notes | |
| ----- | |
| This method converts a dataset object to a Polars DataFrame, which is a | |
| memory-efficient and fast data manipulation library for Rust. | |
| Raises | |
| ------ | |
| Exception | |
| If an error occurs during the conversion process. | |
| Examples | |
| -------- | |
| >>> dataset = datasets.Dataset(data=arrow_table) | |
| >>> dataframe = ClassName.convert_to_polars(dataset) | |
| """ | |
| try: | |
| dataframe = pl.from_arrow(dataset.data.table).with_row_index() | |
| except: | |
| dataframe = pl.from_arrow(dataset.data.table).with_row_count( | |
| name="index" | |
| ) | |
| return dataframe | |