Spaces:

ginipick
/

tube

Sleeping

File size: 1,884 Bytes

bc736f8
97fd8dc
bc736f8
 
b640e62
49d7053
bc736f8
49d7053
a63bdf7
bc736f8
 
9ec122f
 
 
 
 
8d42280
9e07c40
54e9c4b
 
 
4a71c2c
e3b3ac8
8d42280
e3b3ac8
 
9e07c40
e3b3ac8
 
 
 
bc736f8
 
896fad4
8d42280
896fad4
 
5188c19
bc736f8
e15181e
 
4a71c2c
 
5188c19
 
8d42280
5188c19
8d42280

import whisper
from pytube import YouTube
import gradio as gr
import os
import re
import logging

logging.basicConfig(level=logging.INFO)
model = whisper.load_model("base")

def get_text(url):
    if url != '':
        output_text_transcribe = ''

    yt = YouTube(url)
    video = yt.streams.filter(only_audio=True).first()
    out_file = video.download(output_path=".")

    file_stats = os.stat(out_file)
    logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
    
    if file_stats.st_size <= 30000000:
        base, ext = os.path.splitext(out_file)
        new_file = base + '.mp3'
        os.rename(out_file, new_file)
        a = new_file
    
        result = model.transcribe(a)
        return result['text'].strip()
    else:
        logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')

def get_summary(article):
    first_sentences = ' '.join(re.split(r'(?<=[.:;])\s', article)[:5])
    b = summarizer(first_sentences, min_length=20, max_length=120, do_sample=False)
    b = b[0]['summary_text'].replace(' .', '.').strip()
    return b

with gr.Blocks() as demo:
    gr.Markdown("<h1><center>YouTube URL Video-to-Text using <a href=https://openai.com/blog/whisper/ target=_blank>GPTube</a> Model</center></h1>")
   
    input_text_url = gr.Textbox(placeholder='Youtube video URL', label='YouTube URL')
    result_button_transcribe = gr.Button('Transcribe')
    output_text_transcribe = gr.Textbox(placeholder='Transcript of the YouTube video.', label='Transcript')
    
    result_button_transcribe.click(get_text, inputs=input_text_url, outputs=output_text_transcribe)

demo.queue(default_enabled=True).launch(show_api=True)