admin commited on
Commit
7b8f88d
·
1 Parent(s): 5aa5086

merge all video parsers

Browse files
Files changed (6) hide show
  1. app.py +16 -110
  2. bili.py +117 -0
  3. bvid2acid.py +31 -0
  4. config.py +11 -0
  5. tiktok.py +99 -0
  6. utils.py +12 -0
app.py CHANGED
@@ -1,113 +1,19 @@
1
- import os
2
- import re
3
- import shutil
4
- import requests
5
  import gradio as gr
6
- from datetime import datetime
7
- from zoneinfo import ZoneInfo
8
- from tzlocal import get_localzone
9
-
10
- TIMEOUT = None
11
- API = os.getenv("api_tiktok")
12
-
13
-
14
- def timestamp(naive_time: datetime = None, target_tz=ZoneInfo("Asia/Shanghai")):
15
- if not naive_time:
16
- naive_time = datetime.now()
17
-
18
- local_tz = get_localzone()
19
- aware_local = naive_time.replace(tzinfo=local_tz)
20
- return aware_local.astimezone(target_tz).strftime("%Y-%m-%d %H:%M:%S")
21
-
22
-
23
- def download_file(url, video_id, cache_dir="./__pycache__"):
24
- if os.path.exists(cache_dir):
25
- shutil.rmtree(cache_dir)
26
-
27
- os.makedirs(cache_dir)
28
- local_file = f"{cache_dir}/{video_id}.mp4"
29
- response = requests.get(url, stream=True)
30
- if response.status_code == 200:
31
- with open(local_file, "wb") as file:
32
- for chunk in response.iter_content(chunk_size=8192):
33
- file.write(chunk)
34
-
35
- print(f"[{timestamp()}] File was downloaded to {local_file}")
36
- return local_file
37
-
38
-
39
- def extract_fst_url(text):
40
- url_pattern = r'(https?://[^\s"]+)'
41
- match = re.search(url_pattern, text)
42
- if match:
43
- return match.group(1)
44
- else:
45
- return None
46
-
47
-
48
- def infer(video_url):
49
- video = parse_time = desc = avatar = author = sign = None
50
- if not video_url:
51
- desc = "The video sharing link is empty!"
52
- return video, desc, parse_time, avatar, author, sign
53
-
54
- video_url = extract_fst_url(video_url)
55
- if not video_url:
56
- desc = "Please enter a valid video sharing link!"
57
- return video, desc, parse_time, avatar, author, sign
58
-
59
- try:
60
- response = requests.get(API, params={"url": video_url}, timeout=TIMEOUT)
61
- response_json = response.json()
62
- retcode = response_json["code"]
63
- if retcode == 200:
64
- response_data = response_json["data"]
65
- video_id = response_data["play_url"].split("video_id=")[1].split("&")[0]
66
- video = download_file(response_data["video_url"], video_id)
67
- parse_time = response_data["parse_time"]
68
-
69
- additional_data = response_data["additional_data"][0]
70
- desc = additional_data["desc"]
71
- avatar = additional_data["url"].split("?from=")[0]
72
- author = additional_data["nickname"]
73
- sign = additional_data["signature"]
74
-
75
- else:
76
- desc = f"Interface call failed, error code: HTTP {retcode}"
77
-
78
- except Exception as e:
79
- desc = f"Video parsing failed: {e}"
80
-
81
- return video, desc, parse_time, avatar, author, sign
82
-
83
 
84
  if __name__ == "__main__":
85
- gr.Interface(
86
- fn=infer,
87
- inputs=[
88
- gr.Textbox(
89
- label="Please enter TikTok video sharing short link",
90
- placeholder="https://v.douyin.com/*",
91
- ),
92
- ],
93
- outputs=[
94
- gr.Video(
95
- label="Video download",
96
- show_download_button=True,
97
- show_share_button=False,
98
- ),
99
- gr.Textbox(label="Video description", show_copy_button=True),
100
- gr.Textbox(label="Parsing time", show_copy_button=True),
101
- gr.Image(label="Author avatar", show_share_button=False),
102
- gr.Textbox(label="Author nickname", show_copy_button=True),
103
- gr.TextArea(label="Author signature", show_copy_button=True),
104
- ],
105
- title="Parse TikTok video without watermark",
106
- description="This site does not provide any video storage services, only to provide the most basic resolution services",
107
- flagging_mode="never",
108
- examples=[
109
- "https://v.douyin.com/8FVe5DzarE0",
110
- "8.20 Njc:/ [email protected] 11/03 黑塔女士举世无双!# 大黑塔 # 黑塔 # 崩坏星穹铁道 # 再创世的凯歌 # 天才俱乐部 https://v.douyin.com/8FVe5DzarE0/ 复制此链接,打开Dou音搜索,直接观看视频!",
111
- ],
112
- cache_examples=False,
113
- ).launch()
 
 
 
 
 
1
  import gradio as gr
2
+ from tiktok import tiktok_parser
3
+ from bili import bili_parser
4
+ from bvid2acid import bv2acid
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  if __name__ == "__main__":
7
+ with gr.Blocks() as demo:
8
+ gr.Markdown(
9
+ "This site does not provide any video storage services, only to provide the most basic resolution services"
10
+ )
11
+ with gr.Tab("Tiktok"):
12
+ tiktok_parser()
13
+
14
+ with gr.Tab("Bilibili"):
15
+ with gr.Column():
16
+ bv2acid()
17
+ bili_parser()
18
+
19
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bili.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import shutil
4
+ import requests
5
+ import gradio as gr
6
+ from utils import timestamp
7
+ from config import TMP_DIR, HEADER, TIMEOUT, API_BILI
8
+
9
+
10
+ def download_file(url, video_id, cache_dir=TMP_DIR):
11
+ if os.path.exists(cache_dir):
12
+ shutil.rmtree(cache_dir)
13
+
14
+ os.makedirs(cache_dir)
15
+ local_file = f"{cache_dir}/{video_id}.mp4"
16
+ response = requests.get(url, stream=True)
17
+ if response.status_code == 200:
18
+ with open(local_file, "wb") as file:
19
+ for chunk in response.iter_content(chunk_size=8192):
20
+ file.write(chunk)
21
+
22
+ print(f"[{timestamp()}] File was downloaded to {local_file}")
23
+ return local_file
24
+
25
+
26
+ def extract_fst_url(text):
27
+ url_pattern = r'(https?://[^\s"]+)'
28
+ match = re.search(url_pattern, text)
29
+ if match:
30
+ out_url = match.group(1).split("?")[0]
31
+ if out_url[-1] == "/":
32
+ out_url = out_url[:-1]
33
+
34
+ return out_url
35
+
36
+ else:
37
+ return f"https://www.bilibili.com/video/{text}"
38
+
39
+
40
+ def get_real_url(short_url):
41
+ return requests.get(
42
+ short_url,
43
+ headers=HEADER,
44
+ allow_redirects=True,
45
+ timeout=TIMEOUT,
46
+ ).url.split("/?")[0]
47
+
48
+
49
+ def infer(video_url: str, p: int):
50
+ title = cover = desc = dur = video = author = avatar = None
51
+ if not video_url:
52
+ title = "Empty video link!"
53
+ return title, cover, video, desc, dur, avatar, author
54
+
55
+ video_url = extract_fst_url(video_url)
56
+ try:
57
+ if "b23.tv" in video_url:
58
+ video_url = get_real_url(video_url)
59
+
60
+ response = requests.get(API_BILI, params={"url": video_url}, timeout=TIMEOUT)
61
+ response_json = response.json()
62
+ retcode = response_json["code"]
63
+ if retcode == 1:
64
+ title = response_json["title"]
65
+ cover = response_json["imgurl"]
66
+ desc = response_json["desc"]
67
+
68
+ response_data = response_json["data"][int(p) - 1]
69
+ dur = response_data["duration"]
70
+ video_id = video_url.split("/")[-1]
71
+ video = download_file(response_data["video_url"], video_id)
72
+
73
+ author_data = response_json["user"]
74
+ author = author_data["name"]
75
+ avatar = author_data["user_img"]
76
+
77
+ else:
78
+ title = f"Failed to call API, error code: {retcode}"
79
+
80
+ except Exception as e:
81
+ title = f"Failed to parse video: {e}"
82
+
83
+ return title, cover, video, desc, dur, avatar, author
84
+
85
+
86
+ def bili_parser():
87
+ return gr.Interface(
88
+ fn=infer,
89
+ inputs=[
90
+ gr.Textbox(
91
+ label="Please input Bilibili video link",
92
+ placeholder="https://www.bilibili.com/video/*",
93
+ ),
94
+ gr.Slider(label="Part", minimum=1, maximum=1000, step=1, value=1),
95
+ ],
96
+ outputs=[
97
+ gr.Textbox(label="Video title", show_copy_button=True),
98
+ gr.Image(label="Video cover", show_share_button=False),
99
+ gr.Video(
100
+ label="Download video",
101
+ show_download_button=True,
102
+ show_share_button=False,
103
+ ),
104
+ gr.TextArea(label="Video introduction", show_copy_button=True),
105
+ gr.Textbox(label="Video duration(s)", show_copy_button=True),
106
+ gr.Image(label="Uploader avatar", show_share_button=False),
107
+ gr.Textbox(label="Uploader nickname", show_copy_button=True),
108
+ ],
109
+ title="Bilibili video parser",
110
+ flagging_mode="never",
111
+ examples=[
112
+ ["BV1G8iRYBE4f", 1],
113
+ ["https://b23.tv/LSoJzpW", 1],
114
+ ["https://www.bilibili.com/video/BV1G8iRYBE4f", 1],
115
+ ],
116
+ cache_examples=False,
117
+ )
bvid2acid.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import gradio as gr
3
+
4
+
5
+ def infer(bvid: str):
6
+ try:
7
+ response = requests.get(
8
+ "https://api.bilibili.com/x/web-interface/view",
9
+ params={"bvid": bvid},
10
+ headers={
11
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0"
12
+ },
13
+ )
14
+ data = response.json()["data"]
15
+ return data["aid"], data["cid"]
16
+
17
+ except Exception as e:
18
+ return "Failed to parse aid / cid", f"{e}"
19
+
20
+
21
+ def bv2acid():
22
+ return gr.Interface(
23
+ fn=infer,
24
+ inputs=gr.Textbox(label="bvid", show_copy_button=True),
25
+ outputs=[
26
+ gr.Textbox(label="aid", show_copy_button=True),
27
+ gr.Textbox(label="cid", show_copy_button=True),
28
+ ],
29
+ title="Bvid to aid / cid",
30
+ flagging_mode="never",
31
+ )
config.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ TIMEOUT = None
4
+ TMP_DIR = "./__pycache__"
5
+
6
+ API_BILI = os.getenv("api_bili")
7
+ API_TIKTOK = os.getenv("api_tiktok")
8
+
9
+ HEADER = {
10
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36"
11
+ }
tiktok.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import shutil
4
+ import requests
5
+ import gradio as gr
6
+ from utils import timestamp
7
+ from config import API_TIKTOK, TIMEOUT, TMP_DIR
8
+
9
+
10
+ def download_file(url, video_id, cache_dir=TMP_DIR):
11
+ if os.path.exists(cache_dir):
12
+ shutil.rmtree(cache_dir)
13
+
14
+ os.makedirs(cache_dir)
15
+ local_file = f"{cache_dir}/{video_id}.mp4"
16
+ response = requests.get(url, stream=True)
17
+ if response.status_code == 200:
18
+ with open(local_file, "wb") as file:
19
+ for chunk in response.iter_content(chunk_size=8192):
20
+ file.write(chunk)
21
+
22
+ print(f"[{timestamp()}] File was downloaded to {local_file}")
23
+ return local_file
24
+
25
+
26
+ def extract_fst_url(text):
27
+ url_pattern = r'(https?://[^\s"]+)'
28
+ match = re.search(url_pattern, text)
29
+ if match:
30
+ return match.group(1)
31
+ else:
32
+ return None
33
+
34
+
35
+ def infer(video_url):
36
+ video = parse_time = desc = avatar = author = sign = None
37
+ if not video_url:
38
+ desc = "The video sharing link is empty!"
39
+ return video, desc, parse_time, avatar, author, sign
40
+
41
+ video_url = extract_fst_url(video_url)
42
+ if not video_url:
43
+ desc = "Please enter a valid video sharing link!"
44
+ return video, desc, parse_time, avatar, author, sign
45
+
46
+ try:
47
+ response = requests.get(API_TIKTOK, params={"url": video_url}, timeout=TIMEOUT)
48
+ response_json = response.json()
49
+ retcode = response_json["code"]
50
+ if retcode == 200:
51
+ response_data = response_json["data"]
52
+ video_id = response_data["play_url"].split("video_id=")[1].split("&")[0]
53
+ video = download_file(response_data["video_url"], video_id)
54
+ parse_time = response_data["parse_time"]
55
+
56
+ additional_data = response_data["additional_data"][0]
57
+ desc = additional_data["desc"]
58
+ avatar = additional_data["url"].split("?from=")[0]
59
+ author = additional_data["nickname"]
60
+ sign = additional_data["signature"]
61
+
62
+ else:
63
+ desc = f"Interface call failed, error code: HTTP {retcode}"
64
+
65
+ except Exception as e:
66
+ desc = f"Video parsing failed: {e}"
67
+
68
+ return video, desc, parse_time, avatar, author, sign
69
+
70
+
71
+ def tiktok_parser():
72
+ return gr.Interface(
73
+ fn=infer,
74
+ inputs=[
75
+ gr.Textbox(
76
+ label="Please enter TikTok video sharing short link",
77
+ placeholder="https://v.douyin.com/*",
78
+ ),
79
+ ],
80
+ outputs=[
81
+ gr.Video(
82
+ label="Video download",
83
+ show_download_button=True,
84
+ show_share_button=False,
85
+ ),
86
+ gr.Textbox(label="Video description", show_copy_button=True),
87
+ gr.Textbox(label="Parsing time", show_copy_button=True),
88
+ gr.Image(label="Author avatar", show_share_button=False),
89
+ gr.Textbox(label="Author nickname", show_copy_button=True),
90
+ gr.TextArea(label="Author signature", show_copy_button=True),
91
+ ],
92
+ title="Parse TikTok video without watermark",
93
+ flagging_mode="never",
94
+ examples=[
95
+ "https://v.douyin.com/8FVe5DzarE0",
96
+ "8.20 Njc:/ [email protected] 11/03 黑塔女士举世无双!# 大黑塔 # 黑塔 # 崩坏星穹铁道 # 再创世的凯歌 # 天才俱乐部 https://v.douyin.com/8FVe5DzarE0/ 复制此链接,打开Dou音搜索,直接观看视频!",
97
+ ],
98
+ cache_examples=False,
99
+ )
utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from zoneinfo import ZoneInfo
3
+ from tzlocal import get_localzone
4
+
5
+
6
+ def timestamp(naive_time: datetime = None, target_tz=ZoneInfo("Asia/Shanghai")):
7
+ if not naive_time:
8
+ naive_time = datetime.now()
9
+
10
+ local_tz = get_localzone()
11
+ aware_local = naive_time.replace(tzinfo=local_tz)
12
+ return aware_local.astimezone(target_tz).strftime("%Y-%m-%d %H:%M:%S")