Spaces:
Running
Running
import pandas as pd | |
import pytest | |
from langchain.document_loaders import DataFrameLoader | |
from langchain.schema import Document | |
def sample_data_frame() -> pd.DataFrame: | |
data = { | |
"text": ["Hello", "World"], | |
"author": ["Alice", "Bob"], | |
"date": ["2022-01-01", "2022-01-02"], | |
} | |
return pd.DataFrame(data) | |
def test_load_returns_list_of_documents(sample_data_frame: pd.DataFrame) -> None: | |
loader = DataFrameLoader(sample_data_frame) | |
docs = loader.load() | |
assert isinstance(docs, list) | |
assert all(isinstance(doc, Document) for doc in docs) | |
assert len(docs) == 2 | |
def test_load_converts_dataframe_columns_to_document_metadata( | |
sample_data_frame: pd.DataFrame, | |
) -> None: | |
loader = DataFrameLoader(sample_data_frame) | |
docs = loader.load() | |
for i, doc in enumerate(docs): | |
assert doc.metadata["author"] == sample_data_frame.loc[i, "author"] | |
assert doc.metadata["date"] == sample_data_frame.loc[i, "date"] | |
def test_load_uses_page_content_column_to_create_document_text( | |
sample_data_frame: pd.DataFrame, | |
) -> None: | |
sample_data_frame = sample_data_frame.rename(columns={"text": "dummy_test_column"}) | |
loader = DataFrameLoader(sample_data_frame, page_content_column="dummy_test_column") | |
docs = loader.load() | |
assert docs[0].page_content == "Hello" | |
assert docs[1].page_content == "World" | |