Tai Truong
fix readme
d202ada
from typing import cast
import pandas as pd
from pandas import DataFrame as pandas_DataFrame
from langflow.schema.data import Data
class DataFrame(pandas_DataFrame):
"""A pandas DataFrame subclass specialized for handling collections of Data objects.
This class extends pandas.DataFrame to provide seamless integration between
Langflow's Data objects and pandas' powerful data manipulation capabilities.
Args:
data: Input data in various formats:
- List[Data]: List of Data objects
- List[Dict]: List of dictionaries
- Dict: Dictionary of arrays/lists
- pandas.DataFrame: Existing DataFrame
- Any format supported by pandas.DataFrame
**kwargs: Additional arguments passed to pandas.DataFrame constructor
Examples:
>>> # From Data objects
>>> dataset = DataFrame([Data(data={"name": "John"}), Data(data={"name": "Jane"})])
>>> # From dictionaries
>>> dataset = DataFrame([{"name": "John"}, {"name": "Jane"}])
>>> # From dictionary of lists
>>> dataset = DataFrame({"name": ["John", "Jane"], "age": [30, 25]})
"""
def __init__(self, data: None | list[dict | Data] | dict | pd.DataFrame = None, **kwargs):
if data is None:
super().__init__(**kwargs)
return
if isinstance(data, list):
if all(isinstance(x, Data) for x in data):
data = [d.data for d in data if hasattr(d, "data")]
elif not all(isinstance(x, dict) for x in data):
msg = "List items must be either all Data objects or all dictionaries"
raise ValueError(msg)
kwargs["data"] = data
elif isinstance(data, dict | pd.DataFrame):
kwargs["data"] = data
super().__init__(**kwargs)
def to_data_list(self) -> list[Data]:
"""Converts the DataFrame back to a list of Data objects."""
list_of_dicts = self.to_dict(orient="records")
return [Data(data=row) for row in list_of_dicts]
def add_row(self, data: dict | Data) -> "DataFrame":
"""Adds a single row to the dataset.
Args:
data: Either a Data object or a dictionary to add as a new row
Returns:
DataFrame: A new DataFrame with the added row
Example:
>>> dataset = DataFrame([{"name": "John"}])
>>> dataset = dataset.add_row({"name": "Jane"})
"""
if isinstance(data, Data):
data = data.data
new_df = self._constructor([data])
return cast("DataFrame", pd.concat([self, new_df], ignore_index=True))
def add_rows(self, data: list[dict | Data]) -> "DataFrame":
"""Adds multiple rows to the dataset.
Args:
data: List of Data objects or dictionaries to add as new rows
Returns:
DataFrame: A new DataFrame with the added rows
"""
processed_data = []
for item in data:
if isinstance(item, Data):
processed_data.append(item.data)
else:
processed_data.append(item)
new_df = self._constructor(processed_data)
return cast("DataFrame", pd.concat([self, new_df], ignore_index=True))
@property
def _constructor(self):
def _c(*args, **kwargs):
return DataFrame(*args, **kwargs).__finalize__(self)
return _c
def __bool__(self):
"""Truth value testing for the DataFrame.
Returns True if the DataFrame has at least one row, False otherwise.
"""
return not self.empty