File size: 3,917 Bytes
7b8f88d
 
 
186440e
 
7b8f88d
 
 
610f7e5
186440e
7b8f88d
 
 
186440e
 
 
 
 
 
 
 
 
7b8f88d
186440e
 
 
7b8f88d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186440e
7b8f88d
 
 
 
 
 
 
186440e
7b8f88d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186440e
7b8f88d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import re
import requests
import gradio as gr
from tqdm import tqdm
from utils import timestamp, clean_dir
from config import TMP_DIR, HEADER, TIMEOUT, API_BILI


def download_file(url, video_id, cache_dir=f"{TMP_DIR}/bili"):
    clean_dir(cache_dir)
    local_file = f"{cache_dir}/{video_id}.mp4"
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        total_size = int(response.headers.get("Content-Length", 0)) + 1
        time_stamp = timestamp()
        progress_bar = tqdm(
            total=total_size,
            unit="B",
            unit_scale=True,
            desc=f"[{time_stamp}] {local_file}",
        )
        with open(local_file, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    progress_bar.update(len(chunk))

    return local_file


def extract_fst_url(text):
    url_pattern = r'(https?://[^\s"]+)'
    match = re.search(url_pattern, text)
    if match:
        out_url = match.group(1).split("?")[0]
        if out_url[-1] == "/":
            out_url = out_url[:-1]

        return out_url

    else:
        return f"https://www.bilibili.com/video/{text}"


def get_real_url(short_url):
    return requests.get(
        short_url,
        headers=HEADER,
        allow_redirects=True,
        timeout=TIMEOUT,
    ).url.split("/?")[0]


# outer func
def infer(video_url: str, p: int):
    title = cover = desc = dur = video = author = avatar = None
    if not video_url:
        title = "Empty video link!"
        return title, cover, video, desc, dur, avatar, author

    try:
        video_url = extract_fst_url(video_url)
        if "b23.tv" in video_url:
            video_url = get_real_url(video_url)

        response = requests.get(API_BILI, params={"url": video_url}, timeout=TIMEOUT)
        response_json = response.json()
        retcode = response_json["code"]
        if retcode == 1:
            title = response_json["title"]
            cover = response_json["imgurl"]
            desc = response_json["desc"]

            response_data = response_json["data"][int(p) - 1]
            dur = response_data["duration"]
            video_id = video_url.split("/")[-1]
            video = download_file(response_data["video_url"], video_id)

            author_data = response_json["user"]
            author = author_data["name"]
            avatar = author_data["user_img"]

        else:
            raise ConnectionError(f"Failed to call API, error code: {retcode}")

    except Exception as e:
        title = f"Failed to parse video: {e}"

    return title, cover, video, desc, dur, avatar, author


def bili_parser():
    return gr.Interface(
        fn=infer,
        inputs=[
            gr.Textbox(
                label="Please input Bilibili video link",
                placeholder="https://www.bilibili.com/video/*",
            ),
            gr.Slider(label="Part", minimum=1, maximum=1000, step=1, value=1),
        ],
        outputs=[
            gr.Textbox(label="Video title", show_copy_button=True),
            gr.Image(label="Video cover", show_share_button=False),
            gr.Video(
                label="Download video",
                show_download_button=True,
                show_share_button=False,
            ),
            gr.TextArea(label="Video introduction", show_copy_button=True),
            gr.Textbox(label="Video duration(s)", show_copy_button=True),
            gr.Image(label="Uploader avatar", show_share_button=False),
            gr.Textbox(label="Uploader nickname", show_copy_button=True),
        ],
        title="Bilibili video parser",
        flagging_mode="never",
        examples=[
            ["BV1G8iRYBE4f", 1],
            ["https://b23.tv/LSoJzpW", 1],
            ["https://www.bilibili.com/video/BV1G8iRYBE4f", 1],
        ],
        cache_examples=False,
    )