Spaces:
Runtime error
Runtime error
File size: 4,299 Bytes
19befe8 5c66990 fdad218 47d9326 9de2290 47d9326 7a97be1 47d9326 fdad218 47d9326 fdad218 47d9326 7a97be1 47d9326 fdad218 47d9326 fdad218 47d9326 fdad218 47d9326 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import os
os.system("pip install git+https://github.com/openai/whisper.git")
from pytube import YouTube
import gradio as gr
from subprocess import call
import whisper
import logging
# from transformers.pipelines.audio_utils import ffmpeg_read
logger = logging.getLogger("whisper-jax-app")
logger.setLevel(logging.INFO)
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s;%(levelname)s;%(message)s", "%Y-%m-%d %H:%M:%S")
ch.setFormatter(formatter)
logger.addHandler(ch)
BATCH_SIZE = 16
CHUNK_LENGTH_S = 30
NUM_PROC = 8
FILE_LIMIT_MB = 1000
YT_ATTEMPT_LIMIT = 3
def run_cmd(command):
try:
print(command)
call(command)
except KeyboardInterrupt:
print("Process interrupted")
sys.exit(1)
def inference(text):
cmd = ['tts', '--text', text]
run_cmd(cmd)
return 'tts_output.wav'
model = whisper.load_model("base")
inputs = gr.components.Audio(type="filepath", label="Add audio file")
outputs = gr.components.Textbox()
title = "Transcribe multi-lingual audio clips"
description = "An example of using TTS to generate speech from text."
article = ""
examples = [
[""]
]
def transcribe(inputs):
print('Inputs: ', inputs)
# print('Text: ', text)
# progress(0, desc="Loading audio file...")
if inputs is None:
logger.warning("No audio file")
return "No audio file submitted! Please upload an audio file before submitting your request."
file_size_mb = os.stat(inputs).st_size / (1024 * 1024)
if file_size_mb > FILE_LIMIT_MB:
logger.warning("Max file size exceeded")
return f"File size exceeds file size limit. Got file of size {file_size_mb:.2f}MB for a limit of {FILE_LIMIT_MB}MB."
# with open(inputs, "rb") as f:
# inputs = f.read()
# load audio and pad/trim it to fit 30 seconds
result = model.transcribe(audio=inputs, language='english',
word_timestamps=False, verbose=True)
# ---------------------------------------------------
print(result["text"])
return result["text"]
# Transcribe youtube video
# define function for transcription
def youtube_transcript(url):
try:
if url:
yt = YouTube(url, use_oauth=True)
source = yt.streams.filter(progressive=True, file_extension='mp4').order_by(
'resolution').desc().first().download('output/youtube')
transcript = model.transcribe(source)
return transcript["text"]
except Exception as e:
print('Error: ', e)
return 'Error: ' + str(e)
audio_chunked = gr.Interface(
fn=transcribe,
inputs=inputs,
outputs=outputs,
allow_flagging="never",
title=title,
description=description,
article=article,
)
microphone_chunked = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone",
optional=True, type="filepath"),
],
outputs=[
gr.outputs.Textbox(label="Transcription").style(
show_copy_button=True),
],
allow_flagging="never",
title=title,
description=description,
article=article,
)
youtube_chunked = gr.Interface(
fn=youtube_transcript,
inputs=[
gr.inputs.Textbox(label="Youtube URL", type="text"),
],
outputs=[
gr.outputs.Textbox(label="Transcription").style(
show_copy_button=True),
],
allow_flagging="never",
title=title,
description=description,
article=article,
examples=[
[ "https://www.youtube.com/watch?v=nlMuHtV82q8&ab_channel=NothingforSale24",],
["https://www.youtube.com/watch?v=JzPfMbG1vrE&ab_channel=ExplainerVideosByLauren",],
["https://www.youtube.com/watch?v=S68vvV0kod8&ab_channel=Pearl-CohnTelevision"]
],
)
demo = gr.Blocks()
with demo:
gr.TabbedInterface([youtube_chunked, audio_chunked, microphone_chunked], [
"Youtube", "Audio File", "Microphone"])
demo.queue(concurrency_count=1, max_size=5)
demo.launch(show_api=False)
# gr.Interface(
# inference,
# inputs,
# outputs,
# verbose=True,
# title=title,
# description=description,
# article=article,
# examples=examples,
# enable_queue=True,
# ).launch(share=True, debug=True)
|