Spaces:

mirari
/

Whisper-Youtube

Running

File size: 2,429 Bytes

40b945b
 
f488dc2
 
5068470
0755058
d6490f3
5ade7c5
d584e8b
40b945b
dbd9748
f0e5a8c
0e860f6
51d296b
0e860f6
 
 
8033e07
0e860f6
a68778f
40b945b
 
 
 
 
 
 
 
 
 
97ef95f
 
40b945b
 
 
 
 
 
 
 
 
 
 
 
 
 
cb5becc
5ade7c5
40b945b
 
 
8deb6ea
00abedf
a4e52b0
f215841
5a555d3
40b945b
 
 
 
 
0c7f668
adf4b56
b815ecd
40b945b
 
 
97ef95f
40b945b
 
98f72f4
40b945b
2593683
639f239
98f72f4

import gradio as gr
import whisper
#from pytube import YouTube
from pytubefix import YouTube
from pytubefix.cli import on_progress

loaded_model = whisper.load_model("medium")
current_size = 'medium'

def inference(link):
  #yt = YouTube(link)
  #yt = YouTube(link, on_progress_callback=on_progress, use_po_token=True)
  #yt = YouTube(link, use_po_token=True)
  yt = YouTube(link, on_progress_callback=on_progress, client="WEB")
  global audio_stream
  audio_stream = yt.streams.filter(only_audio=True, file_extension='mp4').first()
  path = audio_stream.download()
  #path = yt.streams.get_audio_only().download(mp3=True)
  #path = yt.streams.get_audio_only().download()
  options = whisper.DecodingOptions(language= 'Spanish', without_timestamps=True)
  results = loaded_model.transcribe(path)
  return results['text']

def change_model(size):
  if size == current_size:
    return
  loaded_model = whisper.load_model(size)
  current_size = size

def populate_metadata(link):
  yt = YouTube(link)
  return yt.thumbnail_url, yt.title

title=""
description=""
block = gr.Blocks()
with block:
    gr.HTML(
        """
            <div style="text-align: center; max-width: 500px; margin: 0 auto;">
              <div>
              </div>
            </div>
        """
    )
    with gr.Group():
        with gr.Group():
          sz = gr.Dropdown(label="Model Size", choices=['tiny', 'base','small', 'medium', 'large'], value='medium')
          
          link = gr.Textbox(label="YouTube Link")
          
          gr.Markdown("Ejemplo:  https://www.youtube.com/watch?v=bnvgcQB01mQ")
        
          
          with gr.Row():
            title = gr.Label(label="Video Title")
            img = gr.Image(label="Thumbnail")
          text = gr.Textbox(
              label="Transcription", 
              placeholder="Transcription Output",
              lines=5)
          with gr.Row(): 
              btn = gr.Button("Transcribe")   
               
          
          # Events
          btn.click(inference, inputs=[link], outputs=[text])
          link.change(populate_metadata, inputs=[link], outputs=[img, title])
          sz.change(change_model, inputs=[sz], outputs=[])

#block.launch()

#demo = gr.Interface(css="footer {visibility: hidden}", examples='https://www.youtube.com/watch?v=bnvgcQB01mQ')
demo = gr.Interface(css="foot {visibility: hidden}", fn=inference, inputs=[link], outputs=[text])

demo.launch()