File size: 1,884 Bytes
bc736f8 97fd8dc bc736f8 b640e62 49d7053 bc736f8 49d7053 a63bdf7 bc736f8 9ec122f 8d42280 9e07c40 54e9c4b 4a71c2c e3b3ac8 8d42280 e3b3ac8 9e07c40 e3b3ac8 bc736f8 896fad4 8d42280 896fad4 5188c19 bc736f8 e15181e 4a71c2c 5188c19 8d42280 5188c19 8d42280 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
import whisper
from pytube import YouTube
import gradio as gr
import os
import re
import logging
logging.basicConfig(level=logging.INFO)
model = whisper.load_model("base")
def get_text(url):
if url != '':
output_text_transcribe = ''
yt = YouTube(url)
video = yt.streams.filter(only_audio=True).first()
out_file = video.download(output_path=".")
file_stats = os.stat(out_file)
logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
if file_stats.st_size <= 30000000:
base, ext = os.path.splitext(out_file)
new_file = base + '.mp3'
os.rename(out_file, new_file)
a = new_file
result = model.transcribe(a)
return result['text'].strip()
else:
logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')
def get_summary(article):
first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
b = summarizer(first_sentences, min_length=20, max_length=120, do_sample=False)
b = b[0]['summary_text'].replace(' .', '.').strip()
return b
with gr.Blocks() as demo:
gr.Markdown("<h1><center>YouTube URL Video-to-Text using <a href=https://openai.com/blog/whisper/ target=_blank>GPTube</a> Model</center></h1>")
input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')
result_button_transcribe = gr.Button('Transcribe')
output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe)
demo.queue(default_enabled=True).launch(show_api=True) |