Spaces:
Sleeping
Sleeping
File size: 1,810 Bytes
e84842d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
"""
Copyright (c) 2022, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
"""
import os
from pathlib import Path
from omegaconf import OmegaConf
from lavis.common.utils import (
cleanup_dir,
download_and_extract_archive,
get_abs_path,
get_cache_path,
)
DATA_URL = "https://www.cs.utexas.edu/users/ml/clamp/videoDescription/YouTubeClips.tar"
def download_datasets(root, url):
download_and_extract_archive(url=url, download_root=root)
def move_files(download_path, storage_path):
"""
Move files from download_path to storage_path
"""
print("Moving to {}".format(storage_path))
os.makedirs(storage_path, exist_ok=True)
for file_name in os.listdir(download_path):
os.rename(
os.path.join(download_path, file_name),
os.path.join(storage_path, file_name),
)
if __name__ == "__main__":
config_path = get_abs_path("configs/datasets/msvd/defaults_cap.yaml")
storage_dir = OmegaConf.load(
config_path
).datasets.msvd_cap.build_info.videos.storage
download_dir = Path(get_cache_path(storage_dir)).parent / "download"
storage_dir = Path(get_cache_path(storage_dir))
if storage_dir.exists():
print(f"Dataset already exists at {storage_dir}. Aborting.")
exit(0)
try:
print("Downloading {}".format(DATA_URL))
download_datasets(download_dir, DATA_URL)
except Exception as e:
# remove download dir if failed
cleanup_dir(download_dir)
print("Failed to download or extracting datasets. Aborting.")
move_files(download_dir / "YouTubeClips", storage_dir)
cleanup_dir(download_dir)
|