Spaces:
Running
Running
import pandas as pd | |
import pytest | |
from langflow.schema.data import Data | |
from langflow.schema.dataframe import DataFrame | |
def sample_data_objects() -> list[Data]: | |
"""Fixture providing a list of sample Data objects.""" | |
return [ | |
Data(data={"name": "John", "age": 30, "city": "New York"}), | |
Data(data={"name": "Jane", "age": 25, "city": "Boston"}), | |
Data(data={"name": "Bob", "age": 35, "city": "Chicago"}), | |
] | |
def sample_dataset(sample_data_objects) -> DataFrame: | |
"""Fixture providing a sample DataFrame instance.""" | |
return DataFrame(sample_data_objects) | |
def test_from_data_list_basic(): | |
"""Test basic functionality of from_data_list.""" | |
data_objects = [Data(data={"name": "John", "age": 30}), Data(data={"name": "Jane", "age": 25})] | |
dataset = DataFrame(data_objects) | |
assert isinstance(dataset, DataFrame) | |
assert isinstance(dataset, pd.DataFrame) | |
assert len(dataset) == 2 | |
assert list(dataset.columns) == ["name", "age"] | |
assert dataset.iloc[0]["name"] == "John" | |
assert dataset.iloc[1]["age"] == 25 | |
def test_from_data_list_empty(): | |
"""Test from_data_list with empty input.""" | |
dataset = DataFrame([]) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 0 | |
def test_from_data_list_missing_fields(): | |
"""Test from_data_list with inconsistent data fields.""" | |
data_objects = [ | |
Data(data={"name": "John", "age": 30}), | |
Data(data={"name": "Jane", "city": "Boston"}), # Missing age | |
] | |
dataset = DataFrame(data_objects) | |
assert isinstance(dataset, DataFrame) | |
assert set(dataset.columns) == {"name", "age", "city"} | |
assert pd.isna(dataset.iloc[1]["age"]) | |
assert pd.isna(dataset.iloc[0]["city"]) | |
def test_from_data_list_nested_data(): | |
"""Test from_data_list with nested dictionary data.""" | |
data_objects = [ | |
Data(data={"name": "John", "address": {"city": "New York", "zip": "10001"}}), | |
Data(data={"name": "Jane", "address": {"city": "Boston", "zip": "02108"}}), | |
] | |
dataset = DataFrame(data_objects) | |
assert isinstance(dataset, DataFrame) | |
assert isinstance(dataset["address"][0], dict) | |
assert dataset["address"][0]["city"] == "New York" | |
def test_to_data_list_basic(sample_dataset, sample_data_objects): | |
"""Test basic functionality of to_data_list.""" | |
result = sample_dataset.to_data_list() | |
assert isinstance(result, list) | |
assert all(isinstance(item, Data) for item in result) | |
assert len(result) == len(sample_data_objects) | |
# Check if data is preserved | |
for original, converted in zip(sample_data_objects, result, strict=False): | |
assert original.data == converted.data | |
def test_to_data_list_empty(): | |
"""Test to_data_list with empty DataFrame.""" | |
empty_dataset = DataFrame() | |
result = empty_dataset.to_data_list() | |
assert isinstance(result, list) | |
assert len(result) == 0 | |
def test_to_data_list_modified_data(sample_dataset): | |
"""Test to_data_list after DataFrame modifications.""" | |
# Modify the dataset | |
sample_dataset["new_column"] = [1, 2, 3] | |
sample_dataset.iloc[0, sample_dataset.columns.get_loc("age")] = 31 | |
result = sample_dataset.to_data_list() | |
assert isinstance(result, list) | |
assert all(isinstance(item, Data) for item in result) | |
assert result[0].data["new_column"] == 1 | |
assert result[0].data["age"] == 31 | |
def test_dataset_pandas_operations(sample_dataset): | |
"""Test that pandas operations work correctly on DataFrame.""" | |
# Test filtering | |
filtered = sample_dataset[sample_dataset["age"] > 30] | |
assert isinstance(filtered, DataFrame), f"Expected DataFrame, got {type(filtered)}" | |
assert len(filtered) == 1 | |
assert filtered.iloc[0]["name"] == "Bob" | |
# Test aggregation | |
mean_age = sample_dataset["age"].mean() | |
assert mean_age == 30 | |
# Test groupby | |
grouped = sample_dataset.groupby("city").agg({"age": "mean"}) | |
assert isinstance(grouped, pd.DataFrame) | |
assert len(grouped) == 3 | |
def test_dataset_with_null_values(): | |
"""Test handling of null values in DataFrame.""" | |
data_objects = [Data(data={"name": "John", "age": None}), Data(data={"name": None, "age": 25})] | |
dataset = DataFrame(data_objects) | |
assert pd.isna(dataset.iloc[0]["age"]) | |
assert pd.isna(dataset.iloc[1]["name"]) | |
# Test that null values are preserved when converting back | |
result = dataset.to_data_list() | |
assert pd.isna(result[0].data["age"]), f"Expected NaN, got {result[0].data['age']}" | |
assert pd.isna(result[1].data["name"]), f"Expected NaN, got {result[1].data['name']}" | |
def test_dataset_type_preservation(): | |
"""Test that data types are preserved through conversion.""" | |
data_objects = [ | |
Data( | |
data={ | |
"int_val": 1, | |
"float_val": 1.5, | |
"str_val": "test", | |
"bool_val": True, | |
"list_val": [1, 2, 3], | |
"dict_val": {"key": "value"}, | |
} | |
) | |
] | |
dataset = DataFrame(data_objects) | |
result = dataset.to_data_list() | |
assert isinstance(result[0].data["int_val"], int) | |
assert isinstance(result[0].data["float_val"], float) | |
assert isinstance(result[0].data["str_val"], str) | |
assert isinstance(result[0].data["bool_val"], bool) | |
assert isinstance(result[0].data["list_val"], list) | |
assert isinstance(result[0].data["dict_val"], dict) | |
def test_add_row_with_dict(sample_dataset): | |
"""Test adding a single row using a dictionary.""" | |
new_row = {"name": "Alice", "age": 28, "city": "Seattle"} | |
result = sample_dataset.add_row(new_row) | |
assert isinstance(result, DataFrame) | |
assert len(result) == len(sample_dataset) + 1 | |
assert result.iloc[-1]["name"] == "Alice" | |
assert result.iloc[-1]["age"] == 28 | |
assert result.iloc[-1]["city"] == "Seattle" | |
def test_add_row_with_data_object(sample_dataset): | |
"""Test adding a single row using a Data object.""" | |
new_row = Data(data={"name": "Alice", "age": 28, "city": "Seattle"}) | |
result = sample_dataset.add_row(new_row) | |
assert isinstance(result, DataFrame) | |
assert len(result) == len(sample_dataset) + 1 | |
assert result.iloc[-1]["name"] == "Alice" | |
assert result.iloc[-1]["age"] == 28 | |
assert result.iloc[-1]["city"] == "Seattle" | |
def test_add_rows_with_dicts(sample_dataset): | |
"""Test adding multiple rows using dictionaries.""" | |
new_rows = [{"name": "Alice", "age": 28, "city": "Seattle"}, {"name": "Charlie", "age": 32, "city": "Portland"}] | |
result = sample_dataset.add_rows(new_rows) | |
assert isinstance(result, DataFrame) | |
assert len(result) == len(sample_dataset) + 2 | |
assert result.iloc[-2]["name"] == "Alice" | |
assert result.iloc[-1]["name"] == "Charlie" | |
def test_add_rows_with_data_objects(sample_dataset): | |
"""Test adding multiple rows using Data objects.""" | |
new_rows = [ | |
Data(data={"name": "Alice", "age": 28, "city": "Seattle"}), | |
Data(data={"name": "Charlie", "age": 32, "city": "Portland"}), | |
] | |
result = sample_dataset.add_rows(new_rows) | |
assert isinstance(result, DataFrame) | |
assert len(result) == len(sample_dataset) + 2 | |
assert result.iloc[-2]["name"] == "Alice" | |
assert result.iloc[-1]["name"] == "Charlie" | |
def test_add_rows_mixed_types(sample_dataset): | |
"""Test adding multiple rows using a mix of dictionaries and Data objects.""" | |
new_rows = [ | |
{"name": "Alice", "age": 28, "city": "Seattle"}, | |
Data(data={"name": "Charlie", "age": 32, "city": "Portland"}), | |
] | |
result = sample_dataset.add_rows(new_rows) | |
assert isinstance(result, DataFrame) | |
assert len(result) == len(sample_dataset) + 2 | |
assert result.iloc[-2]["name"] == "Alice" | |
assert result.iloc[-1]["name"] == "Charlie" | |
def test_init_with_data_objects(): | |
"""Test initialization with Data objects.""" | |
data_objects = [Data(data={"name": "John", "age": 30}), Data(data={"name": "Jane", "age": 25})] | |
dataset = DataFrame(data_objects) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 2 | |
assert list(dataset.columns) == ["name", "age"] | |
assert dataset.iloc[0]["name"] == "John" | |
assert dataset.iloc[1]["age"] == 25 | |
def test_init_with_dicts(): | |
"""Test initialization with dictionaries.""" | |
data_dicts = [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}] | |
dataset = DataFrame(data_dicts) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 2 | |
assert list(dataset.columns) == ["name", "age"] | |
assert dataset.iloc[0]["name"] == "John" | |
assert dataset.iloc[1]["age"] == 25 | |
def test_init_with_dict_of_lists(): | |
"""Test initialization with a dictionary of lists.""" | |
data = {"name": ["John", "Jane"], "age": [30, 25]} | |
dataset = DataFrame(data) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 2 | |
assert list(dataset.columns) == ["name", "age"] | |
assert dataset.iloc[0]["name"] == "John" | |
assert dataset.iloc[1]["age"] == 25 | |
def test_init_with_pandas_dataframe(): | |
"""Test initialization with a pandas DataFrame.""" | |
test_df = pd.DataFrame({"name": ["John", "Jane"], "age": [30, 25]}) | |
dataset = DataFrame(test_df) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 2 | |
assert list(dataset.columns) == ["name", "age"] | |
assert dataset.iloc[0]["name"] == "John" | |
assert dataset.iloc[1]["age"] == 25 | |
def test_init_with_none(): | |
"""Test initialization with None.""" | |
dataset = DataFrame(None) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 0 | |
def test_init_with_invalid_list(): | |
"""Test initialization with invalid list items.""" | |
invalid_data = [ | |
{"name": "John", "age": 30}, | |
Data(data={"name": "Jane", "age": 25}), # Mixed types should fail | |
] | |
with pytest.raises(ValueError, match="List items must be either all Data objects or all dictionaries"): | |
DataFrame(invalid_data) | |
def test_init_with_kwargs(): | |
"""Test initialization with additional kwargs.""" | |
data = {"name": ["John", "Jane"], "age": [30, 25]} | |
dataset = DataFrame(data=data, index=["a", "b"]) | |
assert isinstance(dataset, DataFrame) | |
assert len(dataset) == 2 | |
assert list(dataset.index) == ["a", "b"] | |
assert dataset.loc["a"]["name"] == "John" | |
assert dataset.loc["b"]["age"] == 25 | |