from typing import cast import pandas as pd from pandas import DataFrame as pandas_DataFrame from langflow.schema.data import Data class DataFrame(pandas_DataFrame): """A pandas DataFrame subclass specialized for handling collections of Data objects. This class extends pandas.DataFrame to provide seamless integration between Langflow's Data objects and pandas' powerful data manipulation capabilities. Args: data: Input data in various formats: - List[Data]: List of Data objects - List[Dict]: List of dictionaries - Dict: Dictionary of arrays/lists - pandas.DataFrame: Existing DataFrame - Any format supported by pandas.DataFrame **kwargs: Additional arguments passed to pandas.DataFrame constructor Examples: >>> # From Data objects >>> dataset = DataFrame([Data(data={"name": "John"}), Data(data={"name": "Jane"})]) >>> # From dictionaries >>> dataset = DataFrame([{"name": "John"}, {"name": "Jane"}]) >>> # From dictionary of lists >>> dataset = DataFrame({"name": ["John", "Jane"], "age": [30, 25]}) """ def __init__(self, data: None | list[dict | Data] | dict | pd.DataFrame = None, **kwargs): if data is None: super().__init__(**kwargs) return if isinstance(data, list): if all(isinstance(x, Data) for x in data): data = [d.data for d in data if hasattr(d, "data")] elif not all(isinstance(x, dict) for x in data): msg = "List items must be either all Data objects or all dictionaries" raise ValueError(msg) kwargs["data"] = data elif isinstance(data, dict | pd.DataFrame): kwargs["data"] = data super().__init__(**kwargs) def to_data_list(self) -> list[Data]: """Converts the DataFrame back to a list of Data objects.""" list_of_dicts = self.to_dict(orient="records") return [Data(data=row) for row in list_of_dicts] def add_row(self, data: dict | Data) -> "DataFrame": """Adds a single row to the dataset. Args: data: Either a Data object or a dictionary to add as a new row Returns: DataFrame: A new DataFrame with the added row Example: >>> dataset = DataFrame([{"name": "John"}]) >>> dataset = dataset.add_row({"name": "Jane"}) """ if isinstance(data, Data): data = data.data new_df = self._constructor([data]) return cast("DataFrame", pd.concat([self, new_df], ignore_index=True)) def add_rows(self, data: list[dict | Data]) -> "DataFrame": """Adds multiple rows to the dataset. Args: data: List of Data objects or dictionaries to add as new rows Returns: DataFrame: A new DataFrame with the added rows """ processed_data = [] for item in data: if isinstance(item, Data): processed_data.append(item.data) else: processed_data.append(item) new_df = self._constructor(processed_data) return cast("DataFrame", pd.concat([self, new_df], ignore_index=True)) @property def _constructor(self): def _c(*args, **kwargs): return DataFrame(*args, **kwargs).__finalize__(self) return _c def __bool__(self): """Truth value testing for the DataFrame. Returns True if the DataFrame has at least one row, False otherwise. """ return not self.empty