File size: 2,868 Bytes
cfd3735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from pathlib import Path

from langchain.docstore.document import Document
from langchain.document_loaders.csv_loader import CSVLoader


class TestCSVLoader:
    # Tests that a CSV file with valid data is loaded successfully.
    def test_csv_loader_load_valid_data(self) -> None:
        # Setup
        file_path = self._get_csv_file_path("test_nominal.csv")
        expected_docs = [
            Document(
                page_content="column1: value1\ncolumn2: value2\ncolumn3: value3",
                metadata={"source": file_path, "row": 0},
            ),
            Document(
                page_content="column1: value4\ncolumn2: value5\ncolumn3: value6",
                metadata={"source": file_path, "row": 1},
            ),
        ]

        # Exercise
        loader = CSVLoader(file_path=file_path)
        result = loader.load()

        # Assert
        assert result == expected_docs

    # Tests that an empty CSV file is handled correctly.
    def test_csv_loader_load_empty_file(self) -> None:
        # Setup
        file_path = self._get_csv_file_path("test_empty.csv")
        expected_docs: list = []

        # Exercise
        loader = CSVLoader(file_path=file_path)
        result = loader.load()

        # Assert
        assert result == expected_docs

    # Tests that a CSV file with only one row is handled correctly.
    def test_csv_loader_load_single_row_file(self) -> None:
        # Setup
        file_path = self._get_csv_file_path("test_one_row.csv")
        expected_docs = [
            Document(
                page_content="column1: value1\ncolumn2: value2\ncolumn3: value3",
                metadata={"source": file_path, "row": 0},
            )
        ]

        # Exercise
        loader = CSVLoader(file_path=file_path)
        result = loader.load()

        # Assert
        assert result == expected_docs

    # Tests that a CSV file with only one column is handled correctly.
    def test_csv_loader_load_single_column_file(self) -> None:
        # Setup
        file_path = self._get_csv_file_path("test_one_col.csv")
        expected_docs = [
            Document(
                page_content="column1: value1",
                metadata={"source": file_path, "row": 0},
            ),
            Document(
                page_content="column1: value2",
                metadata={"source": file_path, "row": 1},
            ),
            Document(
                page_content="column1: value3",
                metadata={"source": file_path, "row": 2},
            ),
        ]

        # Exercise
        loader = CSVLoader(file_path=file_path)
        result = loader.load()

        # Assert
        assert result == expected_docs

    # utility functions
    def _get_csv_file_path(self, file_name: str) -> str:
        return str(Path(__file__).resolve().parent / "test_docs" / "csv" / file_name)