Spaces:
Runtime error
Runtime error
Add support for downloading files from YouTube using yt-dlp
Browse files- app.py +29 -10
- download.py +38 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,14 +1,19 @@
|
|
|
|
|
|
|
|
| 1 |
from io import StringIO
|
| 2 |
import os
|
|
|
|
| 3 |
import tempfile
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import gradio as gr
|
|
|
|
| 7 |
|
| 8 |
from utils import slugify, write_srt, write_vtt
|
| 9 |
-
import whisper
|
| 10 |
-
|
| 11 |
-
import ffmpeg
|
| 12 |
|
| 13 |
#import os
|
| 14 |
#os.system("pip install git+https://github.com/openai/whisper.git")
|
|
@@ -42,9 +47,8 @@ class UI:
|
|
| 42 |
def __init__(self, inputAudioMaxDuration):
|
| 43 |
self.inputAudioMaxDuration = inputAudioMaxDuration
|
| 44 |
|
| 45 |
-
def transcribeFile(self, modelName, languageName, uploadFile, microphoneData, task):
|
| 46 |
-
source =
|
| 47 |
-
sourceName = os.path.basename(source)
|
| 48 |
|
| 49 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
| 50 |
selectedModel = modelName if modelName is not None else "base"
|
|
@@ -78,7 +82,20 @@ class UI:
|
|
| 78 |
download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
|
| 79 |
download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
|
| 80 |
|
| 81 |
-
return text, vtt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
def createFile(text: str, directory: str, fileName: str) -> str:
|
| 84 |
# Write the text to a file
|
|
@@ -99,6 +116,7 @@ def getSubs(segments: Iterator[dict], format: str) -> str:
|
|
| 99 |
|
| 100 |
segmentStream.seek(0)
|
| 101 |
return segmentStream.read()
|
|
|
|
| 102 |
|
| 103 |
def createUi(inputAudioMaxDuration, share=False):
|
| 104 |
ui = UI(inputAudioMaxDuration)
|
|
@@ -113,13 +131,14 @@ def createUi(inputAudioMaxDuration, share=False):
|
|
| 113 |
demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
|
| 114 |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
|
| 115 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
|
|
|
| 116 |
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
|
| 117 |
gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
|
| 118 |
gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
|
| 119 |
], outputs=[
|
|
|
|
| 120 |
gr.Text(label="Transcription"),
|
| 121 |
-
gr.Text(label="Segments")
|
| 122 |
-
gr.File(label="Download")
|
| 123 |
])
|
| 124 |
|
| 125 |
demo.launch(share=share)
|
|
|
|
| 1 |
+
from typing import Iterator
|
| 2 |
+
|
| 3 |
from io import StringIO
|
| 4 |
import os
|
| 5 |
+
import pathlib
|
| 6 |
import tempfile
|
| 7 |
|
| 8 |
+
# External programs
|
| 9 |
+
import whisper
|
| 10 |
+
import ffmpeg
|
| 11 |
+
|
| 12 |
+
# UI
|
| 13 |
import gradio as gr
|
| 14 |
+
from download import downloadUrl
|
| 15 |
|
| 16 |
from utils import slugify, write_srt, write_vtt
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
#import os
|
| 19 |
#os.system("pip install git+https://github.com/openai/whisper.git")
|
|
|
|
| 47 |
def __init__(self, inputAudioMaxDuration):
|
| 48 |
self.inputAudioMaxDuration = inputAudioMaxDuration
|
| 49 |
|
| 50 |
+
def transcribeFile(self, modelName, languageName, urlData, uploadFile, microphoneData, task):
|
| 51 |
+
source, sourceName = getSource(urlData, uploadFile, microphoneData)
|
|
|
|
| 52 |
|
| 53 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
| 54 |
selectedModel = modelName if modelName is not None else "base"
|
|
|
|
| 82 |
download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
|
| 83 |
download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
|
| 84 |
|
| 85 |
+
return download, text, vtt
|
| 86 |
+
|
| 87 |
+
def getSource(urlData, uploadFile, microphoneData):
|
| 88 |
+
if urlData:
|
| 89 |
+
# Download from YouTube
|
| 90 |
+
source = downloadUrl(urlData)
|
| 91 |
+
else:
|
| 92 |
+
# File input
|
| 93 |
+
source = uploadFile if uploadFile is not None else microphoneData
|
| 94 |
+
|
| 95 |
+
file_path = pathlib.Path(source)
|
| 96 |
+
sourceName = file_path.stem[:18] + file_path.suffix
|
| 97 |
+
|
| 98 |
+
return source, sourceName
|
| 99 |
|
| 100 |
def createFile(text: str, directory: str, fileName: str) -> str:
|
| 101 |
# Write the text to a file
|
|
|
|
| 116 |
|
| 117 |
segmentStream.seek(0)
|
| 118 |
return segmentStream.read()
|
| 119 |
+
|
| 120 |
|
| 121 |
def createUi(inputAudioMaxDuration, share=False):
|
| 122 |
ui = UI(inputAudioMaxDuration)
|
|
|
|
| 131 |
demo = gr.Interface(fn=ui.transcribeFile, description=ui_description, inputs=[
|
| 132 |
gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"], value="medium", label="Model"),
|
| 133 |
gr.Dropdown(choices=sorted(LANGUAGES), label="Language"),
|
| 134 |
+
gr.Text(label="URL (YouTube, etc.)"),
|
| 135 |
gr.Audio(source="upload", type="filepath", label="Upload Audio"),
|
| 136 |
gr.Audio(source="microphone", type="filepath", label="Microphone Input"),
|
| 137 |
gr.Dropdown(choices=["transcribe", "translate"], label="Task"),
|
| 138 |
], outputs=[
|
| 139 |
+
gr.File(label="Download"),
|
| 140 |
gr.Text(label="Transcription"),
|
| 141 |
+
gr.Text(label="Segments")
|
|
|
|
| 142 |
])
|
| 143 |
|
| 144 |
demo.launch(share=share)
|
download.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
from tempfile import mkdtemp
|
| 4 |
+
from yt_dlp import YoutubeDL
|
| 5 |
+
from yt_dlp.postprocessor import PostProcessor
|
| 6 |
+
|
| 7 |
+
class FilenameCollectorPP(PostProcessor):
|
| 8 |
+
def __init__(self):
|
| 9 |
+
super(FilenameCollectorPP, self).__init__(None)
|
| 10 |
+
self.filenames = []
|
| 11 |
+
|
| 12 |
+
def run(self, information):
|
| 13 |
+
self.filenames.append(information["filepath"])
|
| 14 |
+
return [], information
|
| 15 |
+
|
| 16 |
+
def downloadUrl(url: str):
|
| 17 |
+
destinationDirectory = mkdtemp()
|
| 18 |
+
|
| 19 |
+
ydl_opts = {
|
| 20 |
+
"format": "bestaudio/best",
|
| 21 |
+
'playlist_items': '1',
|
| 22 |
+
'paths': {
|
| 23 |
+
'home': destinationDirectory
|
| 24 |
+
}
|
| 25 |
+
}
|
| 26 |
+
filename_collector = FilenameCollectorPP()
|
| 27 |
+
|
| 28 |
+
with YoutubeDL(ydl_opts) as ydl:
|
| 29 |
+
ydl.add_post_processor(filename_collector)
|
| 30 |
+
ydl.download([url])
|
| 31 |
+
|
| 32 |
+
if len(filename_collector.filenames) <= 0:
|
| 33 |
+
raise Exception("Cannot download " + url)
|
| 34 |
+
|
| 35 |
+
result = filename_collector.filenames[0]
|
| 36 |
+
print("Downloaded " + result)
|
| 37 |
+
|
| 38 |
+
return result
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
git+https://github.com/openai/whisper.git
|
| 2 |
transformers
|
| 3 |
ffmpeg-python==0.2.0
|
| 4 |
-
gradio
|
|
|
|
|
|
| 1 |
git+https://github.com/openai/whisper.git
|
| 2 |
transformers
|
| 3 |
ffmpeg-python==0.2.0
|
| 4 |
+
gradio
|
| 5 |
+
yt-dlp
|