Spaces:
Build error
Build error
File size: 2,370 Bytes
c25690f 600b9b2 c25690f b821f8f 9d44102 b821f8f c25690f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
import time
import shutil
import pandas as pd
import gradio as gr
from vidfetch import youtube_dl_install_helper, push_to_hf
from panda70m_downloader import download_video_links, download_videos_by_csv
SAVE_CSV_DIR = "panda70m_csv"
SAVE_VIDEOS_DIR = "panda70m_videos"
def handle(
hf_token: str,
filename: str,
):
try:
import youtube_dl
except:
youtube_dl_install_helper(hf_token=hf_token)
import youtube_dl
download_video_links(hf_token=hf_token, filename=filename, save_dir=SAVE_CSV_DIR)
# devide .csv to 100 files and download
csv_path = os.path.join(SAVE_CSV_DIR, filename)
data = pd.read_csv(csv_path)
for idx in range(len(data) // 100):
if idx <= 9:
continue;
begin_idx = idx * 100
end_idx = idx * 100 + 100
part_data = data[begin_idx : end_idx]
part_filename = filename.replace(".csv", "") + "_{:06d}_{:06d}.csv".format(begin_idx, end_idx)
targz_filename = part_filename.replace(".csv", ".tar.gz")
part_save_path = os.path.join(SAVE_CSV_DIR, part_filename)
part_data.to_csv(part_save_path)
download_videos_by_csv(
csv_file_path=part_save_path,
save_dir=SAVE_VIDEOS_DIR,
targz_filename=targz_filename
)
push_to_hf(
hf_token=hf_token,
hf_repo_id="OpenVideo/Panda-70M-raw",
file_path=os.path.join(SAVE_VIDEOS_DIR, targz_filename),
path_in_repo=targz_filename
)
shutil.rmtree(SAVE_VIDEOS_DIR)
with gr.Blocks() as demo:
gr.Markdown(
'''
Panda70M-Downloader
'''
)
hf_token = gr.Textbox(label="HuggingFace Token")
filename = gr.Textbox(label="csv name")
with gr.Row():
button = gr.Button("Submit", variant="primary")
clear = gr.Button("Clear")
button.click(
handle,
[hf_token, filename],
outputs=None
)
import time
import subprocess
MAX_TIME = 1000
cur_time = 0
while cur_time < MAX_TIME:
cur_time += 1
uninstall_cmd = ['pip', 'uninstall', '-y', 'ml4co-kit==0.0.1']
subprocess.run(uninstall_cmd)
install_cmd = ['pip', 'install', '--no-cache-dir', 'ml4co-kit']
subprocess.run(install_cmd)
time.sleep(1)
if __name__ == "__main__":
demo.launch(debug = True) |