File size: 1,871 Bytes
7288748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from pathlib import Path

import pytest

from youtube_transcriber.loading.loaderiterator import LoaderIterator
from youtube_transcriber.loading.serialization import JsonSerializer

@pytest.fixture
def loader_iterator():
    test_folder = Path.home()/"whisper_gpt_pipeline/youtube_transcriber/test"
    paths = [Path(test_folder/"files/1.json"), Path(test_folder/"files/2.json"), 
             Path("non-existing-path"), Path(test_folder/"files/3.json"), 
             Path(test_folder/"files/4.json"), Path(test_folder/"files/5.json")]
    return LoaderIterator(JsonSerializer(), 2, paths)

def test_loader_iterator_init():
    loader_iterator = LoaderIterator(JsonSerializer(), 3, "dummy_paths")
    assert type(loader_iterator) == LoaderIterator
    assert type(loader_iterator.serializer) == JsonSerializer
    assert loader_iterator.load_paths == "dummy_paths"
    assert loader_iterator.num_files_per_iteration == 3
    
def test_loop_through_loaded_data(loader_iterator):
    expected_data = [
        [
            {
                "channel_name": "The verge",
                "url": "https://www.youtube.com/watch?v=YMlTSmusEmA"
            },
            {
                "channel_name": "The verge",
                "url": "https://www.youtube.com/watch?v=Jzl0hHTc7Jw"
            }
        ],
        [
            {
                "channel_name": "The verge",
                "url": "https://www.youtube.com/watch?v=gV50hpSKHFQ"
            }
        ],
        [
            {
                "channel_name": "The verge",
                "url": "https://www.youtube.com/watch?v=N6ZyzoibXqg"
            },
            {
                "channel_name": "The verge",
                "url": "https://www.youtube.com/watch?v=q90v9FLXi1E"
            }
        ]
    ]
    
    for i, data in enumerate(loader_iterator):
        assert data == expected_data[i]