File size: 1,449 Bytes
aa9cce2
07d3e9f
 
cd864c2
aa9cce2
cd864c2
 
 
07d3e9f
cd864c2
 
aa9cce2
07d3e9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd864c2
 
 
 
 
 
 
 
 
07d3e9f
 
 
 
 
 
 
 
 
 
cd864c2
07d3e9f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import gradio as gr
import yt_dlp
import os
from openai import OpenAI

client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])

# from faster_whisper import WhisperModel
# tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v1, large-v2, large-v3, or large
# model_name = 'base'
# model = WhisperModel(model_name, device="cpu", download_root="./models")

ydl_opts = {
    'outtmpl': 'demo.m4a',
    'format': 'm4a/bestaudio/best',
    'postprocessors': [{  # Extract audio using ffmpeg
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'm4a',
    }],
    # 'proxy': 'socks5://192.168.2.18:20170',
}

def download_audio(url):
    if os.path.exists('demo.m4a'):
        os.remove('demo.m4a')
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        code = ydl.download([url])
    assert code == 0, "Failed to download audio"

def generate_text(url):
    download_audio(url)
    with open("demo.m4a", "rb") as f:
        transcription = client.audio.transcriptions.create(
            model="whisper-1", 
            file=f, 
            response_format="text"
        )
    return transcription.text

with gr.Blocks() as demo:
    with gr.Column():
        name = gr.Textbox(label="Enter your youtube url")
        button = gr.Button("Download")

    with gr.Column():
        output = gr.TextArea(label="Output")

    button.click(
        generate_text,
        inputs=[name],
        outputs=[output],
    )

demo.launch()