video_parsers / bili.py
admin
merge all video parsers
7b8f88d
raw
history blame
3.65 kB
import os
import re
import shutil
import requests
import gradio as gr
from utils import timestamp
from config import TMP_DIR, HEADER, TIMEOUT, API_BILI
def download_file(url, video_id, cache_dir=TMP_DIR):
if os.path.exists(cache_dir):
shutil.rmtree(cache_dir)
os.makedirs(cache_dir)
local_file = f"{cache_dir}/{video_id}.mp4"
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(local_file, "wb") as file:
for chunk in response.iter_content(chunk_size=8192):
file.write(chunk)
print(f"[{timestamp()}] File was downloaded to {local_file}")
return local_file
def extract_fst_url(text):
url_pattern = r'(https?://[^\s"]+)'
match = re.search(url_pattern, text)
if match:
out_url = match.group(1).split("?")[0]
if out_url[-1] == "/":
out_url = out_url[:-1]
return out_url
else:
return f"https://www.bilibili.com/video/{text}"
def get_real_url(short_url):
return requests.get(
short_url,
headers=HEADER,
allow_redirects=True,
timeout=TIMEOUT,
).url.split("/?")[0]
def infer(video_url: str, p: int):
title = cover = desc = dur = video = author = avatar = None
if not video_url:
title = "Empty video link!"
return title, cover, video, desc, dur, avatar, author
video_url = extract_fst_url(video_url)
try:
if "b23.tv" in video_url:
video_url = get_real_url(video_url)
response = requests.get(API_BILI, params={"url": video_url}, timeout=TIMEOUT)
response_json = response.json()
retcode = response_json["code"]
if retcode == 1:
title = response_json["title"]
cover = response_json["imgurl"]
desc = response_json["desc"]
response_data = response_json["data"][int(p) - 1]
dur = response_data["duration"]
video_id = video_url.split("/")[-1]
video = download_file(response_data["video_url"], video_id)
author_data = response_json["user"]
author = author_data["name"]
avatar = author_data["user_img"]
else:
title = f"Failed to call API, error code: {retcode}"
except Exception as e:
title = f"Failed to parse video: {e}"
return title, cover, video, desc, dur, avatar, author
def bili_parser():
return gr.Interface(
fn=infer,
inputs=[
gr.Textbox(
label="Please input Bilibili video link",
placeholder="https://www.bilibili.com/video/*",
),
gr.Slider(label="Part", minimum=1, maximum=1000, step=1, value=1),
],
outputs=[
gr.Textbox(label="Video title", show_copy_button=True),
gr.Image(label="Video cover", show_share_button=False),
gr.Video(
label="Download video",
show_download_button=True,
show_share_button=False,
),
gr.TextArea(label="Video introduction", show_copy_button=True),
gr.Textbox(label="Video duration(s)", show_copy_button=True),
gr.Image(label="Uploader avatar", show_share_button=False),
gr.Textbox(label="Uploader nickname", show_copy_button=True),
],
title="Bilibili video parser",
flagging_mode="never",
examples=[
["BV1G8iRYBE4f", 1],
["https://b23.tv/LSoJzpW", 1],
["https://www.bilibili.com/video/BV1G8iRYBE4f", 1],
],
cache_examples=False,
)