Spaces:
Runtime error
Runtime error
File size: 1,871 Bytes
7288748 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from pathlib import Path
import pytest
from youtube_transcriber.loading.loaderiterator import LoaderIterator
from youtube_transcriber.loading.serialization import JsonSerializer
@pytest.fixture
def loader_iterator():
test_folder = Path.home()/"whisper_gpt_pipeline/youtube_transcriber/test"
paths = [Path(test_folder/"files/1.json"), Path(test_folder/"files/2.json"),
Path("non-existing-path"), Path(test_folder/"files/3.json"),
Path(test_folder/"files/4.json"), Path(test_folder/"files/5.json")]
return LoaderIterator(JsonSerializer(), 2, paths)
def test_loader_iterator_init():
loader_iterator = LoaderIterator(JsonSerializer(), 3, "dummy_paths")
assert type(loader_iterator) == LoaderIterator
assert type(loader_iterator.serializer) == JsonSerializer
assert loader_iterator.load_paths == "dummy_paths"
assert loader_iterator.num_files_per_iteration == 3
def test_loop_through_loaded_data(loader_iterator):
expected_data = [
[
{
"channel_name": "The verge",
"url": "https://www.youtube.com/watch?v=YMlTSmusEmA"
},
{
"channel_name": "The verge",
"url": "https://www.youtube.com/watch?v=Jzl0hHTc7Jw"
}
],
[
{
"channel_name": "The verge",
"url": "https://www.youtube.com/watch?v=gV50hpSKHFQ"
}
],
[
{
"channel_name": "The verge",
"url": "https://www.youtube.com/watch?v=N6ZyzoibXqg"
},
{
"channel_name": "The verge",
"url": "https://www.youtube.com/watch?v=q90v9FLXi1E"
}
]
]
for i, data in enumerate(loader_iterator):
assert data == expected_data[i] |