agent-flow / src /backend /tests /unit /schema /test_schema_data_set.py
Tai Truong
fix readme
d202ada
import pandas as pd
import pytest
from langflow.schema.data import Data
from langflow.schema.dataframe import DataFrame
@pytest.fixture
def sample_data_objects() -> list[Data]:
"""Fixture providing a list of sample Data objects."""
return [
Data(data={"name": "John", "age": 30, "city": "New York"}),
Data(data={"name": "Jane", "age": 25, "city": "Boston"}),
Data(data={"name": "Bob", "age": 35, "city": "Chicago"}),
]
@pytest.fixture
def sample_dataset(sample_data_objects) -> DataFrame:
"""Fixture providing a sample DataFrame instance."""
return DataFrame(sample_data_objects)
def test_from_data_list_basic():
"""Test basic functionality of from_data_list."""
data_objects = [Data(data={"name": "John", "age": 30}), Data(data={"name": "Jane", "age": 25})]
dataset = DataFrame(data_objects)
assert isinstance(dataset, DataFrame)
assert isinstance(dataset, pd.DataFrame)
assert len(dataset) == 2
assert list(dataset.columns) == ["name", "age"]
assert dataset.iloc[0]["name"] == "John"
assert dataset.iloc[1]["age"] == 25
def test_from_data_list_empty():
"""Test from_data_list with empty input."""
dataset = DataFrame([])
assert isinstance(dataset, DataFrame)
assert len(dataset) == 0
def test_from_data_list_missing_fields():
"""Test from_data_list with inconsistent data fields."""
data_objects = [
Data(data={"name": "John", "age": 30}),
Data(data={"name": "Jane", "city": "Boston"}), # Missing age
]
dataset = DataFrame(data_objects)
assert isinstance(dataset, DataFrame)
assert set(dataset.columns) == {"name", "age", "city"}
assert pd.isna(dataset.iloc[1]["age"])
assert pd.isna(dataset.iloc[0]["city"])
def test_from_data_list_nested_data():
"""Test from_data_list with nested dictionary data."""
data_objects = [
Data(data={"name": "John", "address": {"city": "New York", "zip": "10001"}}),
Data(data={"name": "Jane", "address": {"city": "Boston", "zip": "02108"}}),
]
dataset = DataFrame(data_objects)
assert isinstance(dataset, DataFrame)
assert isinstance(dataset["address"][0], dict)
assert dataset["address"][0]["city"] == "New York"
def test_to_data_list_basic(sample_dataset, sample_data_objects):
"""Test basic functionality of to_data_list."""
result = sample_dataset.to_data_list()
assert isinstance(result, list)
assert all(isinstance(item, Data) for item in result)
assert len(result) == len(sample_data_objects)
# Check if data is preserved
for original, converted in zip(sample_data_objects, result, strict=False):
assert original.data == converted.data
def test_to_data_list_empty():
"""Test to_data_list with empty DataFrame."""
empty_dataset = DataFrame()
result = empty_dataset.to_data_list()
assert isinstance(result, list)
assert len(result) == 0
def test_to_data_list_modified_data(sample_dataset):
"""Test to_data_list after DataFrame modifications."""
# Modify the dataset
sample_dataset["new_column"] = [1, 2, 3]
sample_dataset.iloc[0, sample_dataset.columns.get_loc("age")] = 31
result = sample_dataset.to_data_list()
assert isinstance(result, list)
assert all(isinstance(item, Data) for item in result)
assert result[0].data["new_column"] == 1
assert result[0].data["age"] == 31
def test_dataset_pandas_operations(sample_dataset):
"""Test that pandas operations work correctly on DataFrame."""
# Test filtering
filtered = sample_dataset[sample_dataset["age"] > 30]
assert isinstance(filtered, DataFrame), f"Expected DataFrame, got {type(filtered)}"
assert len(filtered) == 1
assert filtered.iloc[0]["name"] == "Bob"
# Test aggregation
mean_age = sample_dataset["age"].mean()
assert mean_age == 30
# Test groupby
grouped = sample_dataset.groupby("city").agg({"age": "mean"})
assert isinstance(grouped, pd.DataFrame)
assert len(grouped) == 3
def test_dataset_with_null_values():
"""Test handling of null values in DataFrame."""
data_objects = [Data(data={"name": "John", "age": None}), Data(data={"name": None, "age": 25})]
dataset = DataFrame(data_objects)
assert pd.isna(dataset.iloc[0]["age"])
assert pd.isna(dataset.iloc[1]["name"])
# Test that null values are preserved when converting back
result = dataset.to_data_list()
assert pd.isna(result[0].data["age"]), f"Expected NaN, got {result[0].data['age']}"
assert pd.isna(result[1].data["name"]), f"Expected NaN, got {result[1].data['name']}"
def test_dataset_type_preservation():
"""Test that data types are preserved through conversion."""
data_objects = [
Data(
data={
"int_val": 1,
"float_val": 1.5,
"str_val": "test",
"bool_val": True,
"list_val": [1, 2, 3],
"dict_val": {"key": "value"},
}
)
]
dataset = DataFrame(data_objects)
result = dataset.to_data_list()
assert isinstance(result[0].data["int_val"], int)
assert isinstance(result[0].data["float_val"], float)
assert isinstance(result[0].data["str_val"], str)
assert isinstance(result[0].data["bool_val"], bool)
assert isinstance(result[0].data["list_val"], list)
assert isinstance(result[0].data["dict_val"], dict)
def test_add_row_with_dict(sample_dataset):
"""Test adding a single row using a dictionary."""
new_row = {"name": "Alice", "age": 28, "city": "Seattle"}
result = sample_dataset.add_row(new_row)
assert isinstance(result, DataFrame)
assert len(result) == len(sample_dataset) + 1
assert result.iloc[-1]["name"] == "Alice"
assert result.iloc[-1]["age"] == 28
assert result.iloc[-1]["city"] == "Seattle"
def test_add_row_with_data_object(sample_dataset):
"""Test adding a single row using a Data object."""
new_row = Data(data={"name": "Alice", "age": 28, "city": "Seattle"})
result = sample_dataset.add_row(new_row)
assert isinstance(result, DataFrame)
assert len(result) == len(sample_dataset) + 1
assert result.iloc[-1]["name"] == "Alice"
assert result.iloc[-1]["age"] == 28
assert result.iloc[-1]["city"] == "Seattle"
def test_add_rows_with_dicts(sample_dataset):
"""Test adding multiple rows using dictionaries."""
new_rows = [{"name": "Alice", "age": 28, "city": "Seattle"}, {"name": "Charlie", "age": 32, "city": "Portland"}]
result = sample_dataset.add_rows(new_rows)
assert isinstance(result, DataFrame)
assert len(result) == len(sample_dataset) + 2
assert result.iloc[-2]["name"] == "Alice"
assert result.iloc[-1]["name"] == "Charlie"
def test_add_rows_with_data_objects(sample_dataset):
"""Test adding multiple rows using Data objects."""
new_rows = [
Data(data={"name": "Alice", "age": 28, "city": "Seattle"}),
Data(data={"name": "Charlie", "age": 32, "city": "Portland"}),
]
result = sample_dataset.add_rows(new_rows)
assert isinstance(result, DataFrame)
assert len(result) == len(sample_dataset) + 2
assert result.iloc[-2]["name"] == "Alice"
assert result.iloc[-1]["name"] == "Charlie"
def test_add_rows_mixed_types(sample_dataset):
"""Test adding multiple rows using a mix of dictionaries and Data objects."""
new_rows = [
{"name": "Alice", "age": 28, "city": "Seattle"},
Data(data={"name": "Charlie", "age": 32, "city": "Portland"}),
]
result = sample_dataset.add_rows(new_rows)
assert isinstance(result, DataFrame)
assert len(result) == len(sample_dataset) + 2
assert result.iloc[-2]["name"] == "Alice"
assert result.iloc[-1]["name"] == "Charlie"
def test_init_with_data_objects():
"""Test initialization with Data objects."""
data_objects = [Data(data={"name": "John", "age": 30}), Data(data={"name": "Jane", "age": 25})]
dataset = DataFrame(data_objects)
assert isinstance(dataset, DataFrame)
assert len(dataset) == 2
assert list(dataset.columns) == ["name", "age"]
assert dataset.iloc[0]["name"] == "John"
assert dataset.iloc[1]["age"] == 25
def test_init_with_dicts():
"""Test initialization with dictionaries."""
data_dicts = [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]
dataset = DataFrame(data_dicts)
assert isinstance(dataset, DataFrame)
assert len(dataset) == 2
assert list(dataset.columns) == ["name", "age"]
assert dataset.iloc[0]["name"] == "John"
assert dataset.iloc[1]["age"] == 25
def test_init_with_dict_of_lists():
"""Test initialization with a dictionary of lists."""
data = {"name": ["John", "Jane"], "age": [30, 25]}
dataset = DataFrame(data)
assert isinstance(dataset, DataFrame)
assert len(dataset) == 2
assert list(dataset.columns) == ["name", "age"]
assert dataset.iloc[0]["name"] == "John"
assert dataset.iloc[1]["age"] == 25
def test_init_with_pandas_dataframe():
"""Test initialization with a pandas DataFrame."""
test_df = pd.DataFrame({"name": ["John", "Jane"], "age": [30, 25]})
dataset = DataFrame(test_df)
assert isinstance(dataset, DataFrame)
assert len(dataset) == 2
assert list(dataset.columns) == ["name", "age"]
assert dataset.iloc[0]["name"] == "John"
assert dataset.iloc[1]["age"] == 25
def test_init_with_none():
"""Test initialization with None."""
dataset = DataFrame(None)
assert isinstance(dataset, DataFrame)
assert len(dataset) == 0
def test_init_with_invalid_list():
"""Test initialization with invalid list items."""
invalid_data = [
{"name": "John", "age": 30},
Data(data={"name": "Jane", "age": 25}), # Mixed types should fail
]
with pytest.raises(ValueError, match="List items must be either all Data objects or all dictionaries"):
DataFrame(invalid_data)
def test_init_with_kwargs():
"""Test initialization with additional kwargs."""
data = {"name": ["John", "Jane"], "age": [30, 25]}
dataset = DataFrame(data=data, index=["a", "b"])
assert isinstance(dataset, DataFrame)
assert len(dataset) == 2
assert list(dataset.index) == ["a", "b"]
assert dataset.loc["a"]["name"] == "John"
assert dataset.loc["b"]["age"] == 25