Valentin De Matos commited on
Commit
19f5fa0
·
1 Parent(s): 0fe75d2

Signed-off-by: Valentin De Matos <[email protected]>

Files changed (4) hide show
  1. Dockerfile +13 -0
  2. README.md +7 -5
  3. app.py +138 -0
  4. requirements.txt +2 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9.16
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ RUN cat /etc/ImageMagick-6/policy.xml | sed "s|[none]|[write|read]|g" > /etc/ImageMagick-6/policy.xml
10
+
11
+ COPY . .
12
+
13
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
  title: Youtube Subtitler
3
- emoji: 🐠
4
- colorFrom: indigo
5
- colorTo: yellow
6
- sdk: docker
7
- pinned: false
 
 
8
  license: unknown
9
  ---
10
 
 
1
  ---
2
  title: Youtube Subtitler
3
+ emoji: 🎥
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 3.19.1
8
+ app_file: app.py
9
+ pinned: true
10
  license: unknown
11
  ---
12
 
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import gradio as gr
4
+
5
+ from time import time
6
+ from pytube import YouTube
7
+ from moviepy.editor import *
8
+ from moviepy.video.tools.subtitles import SubtitlesClip
9
+
10
+
11
+ GLADIA_API_KEY = os.environ.get("GLADIA_API_KEY")
12
+
13
+
14
+ headers = {
15
+ "accept": "application/json",
16
+ "x-gladia-key": GLADIA_API_KEY,
17
+ }
18
+
19
+ ACCEPTED_LANGUAGE_BEHAVIOUR = [
20
+ "manual",
21
+ "automatic single language",
22
+ "automatic multiple languages",
23
+ ]
24
+
25
+
26
+ def __download_youtube_video(youtube_url: str, path):
27
+
28
+ youtube_video = YouTube(youtube_url)
29
+
30
+ video_stream = youtube_video.streams.filter(res="720p").first()
31
+
32
+ if not video_stream:
33
+ video_stream = youtube_video.streams.filter(res="480p").first()
34
+
35
+ if not video_stream:
36
+ video_stream = youtube_video.streams.filter(res="360p").first()
37
+
38
+ video_stream.download(filename=path)
39
+
40
+
41
+ def __apply_srt_to_video(path_to_srt: str, path_to_video: str, output_path: str):
42
+
43
+ def __generator(raw_txt):
44
+
45
+ MIN_LINE_SIZE = 80
46
+ MAX_LINE_SIZE = 140
47
+
48
+ text_to_display = []
49
+
50
+ while raw_txt:
51
+ idx = MIN_LINE_SIZE if MIN_LINE_SIZE < len(raw_txt) else len(raw_txt)
52
+
53
+ while idx <= len(raw_txt) and raw_txt[idx - 1] not in ('\n', ' ', ',', ';', '.', '-') and idx < MAX_LINE_SIZE:
54
+ idx += 1
55
+
56
+ text_to_display.append(raw_txt[:idx])
57
+ raw_txt = raw_txt[idx:]
58
+
59
+ text_to_display = "\n".join(text_to_display)
60
+
61
+ return TextClip(text_to_display, font='Arial', fontsize=30, color='white')
62
+
63
+ subs = SubtitlesClip(path_to_srt, __generator)
64
+ subtitles = SubtitlesClip(subs, __generator)
65
+
66
+ video = VideoFileClip(path_to_video)
67
+
68
+ # Combine video and subtitle clips
69
+ result = CompositeVideoClip([video, subtitles.set_pos(('center','bottom'))])
70
+
71
+ # Write the result to a new video file
72
+ result.write_videofile(output_path)
73
+
74
+
75
+ def youtube_subtitler(youtube_url: str):
76
+ files_to_remove = []
77
+
78
+ try:
79
+ path_to_srt = f"/tmp/{str(time())}.srt"
80
+
81
+ files = {
82
+ 'audio_url': (None, youtube_url),
83
+ 'output_format': (None, "srt"),
84
+ 'language_behaviour': (None, 'automatic multiple languages'),
85
+ }
86
+
87
+ response = requests.post(
88
+ 'https://api.gladia.io/audio/text/audio-transcription/?output_format=srt',
89
+ headers=headers,
90
+ files=files
91
+ )
92
+
93
+ if response.status_code != 200:
94
+ raise RuntimeError(f"Failed to transcribe the video: {response.content}")
95
+
96
+ with open(path_to_srt, "w+") as f:
97
+ f.write(response.json()["prediction"])
98
+
99
+ files_to_remove.append(path_to_srt)
100
+
101
+ path_to_video = f"/tmp/{str(time())}.mp4"
102
+
103
+ __download_youtube_video(youtube_url=youtube_url, path=path_to_video)
104
+
105
+ files_to_remove.append(path_to_video)
106
+
107
+ output_path = f"/tmp/{str(time())}.mp4"
108
+
109
+ __apply_srt_to_video(
110
+ path_to_srt=path_to_srt,
111
+ path_to_video=path_to_video,
112
+ output_path=output_path
113
+ )
114
+
115
+ finally:
116
+ for file in files_to_remove:
117
+ os.remove(file)
118
+
119
+ return output_path
120
+
121
+
122
+ iface = gr.Interface(
123
+ title="Gladia.io Youtube Subtitler",
124
+ description="""Burn subtitles into your youtube video using Gladia's Audio-Transcription solution.
125
+ <br/><br/>
126
+ You are more than welcome to join us on [Slack](https://gladia-io.slack.com)
127
+ and don't forget to get your own API key on [Gladia.io](https://gladia.io/) during the free alpha !
128
+ """,
129
+ fn=youtube_subtitler,
130
+ inputs=[
131
+ gr.Text(label="Youtube URL"),
132
+ ], outputs=[
133
+ gr.Video(format="mp4"),
134
+ ],
135
+ )
136
+
137
+ iface.queue()
138
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pytube
2
+ moviepy