File size: 6,067 Bytes
0bb0d8e
 
 
16bc3e4
0bb0d8e
da9138c
 
 
0bb0d8e
16bc3e4
 
 
a96aeb1
16bc3e4
 
 
 
 
 
da9138c
16bc3e4
da9138c
 
a96aeb1
 
da9138c
a96aeb1
da9138c
a96aeb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da9138c
a96aeb1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16bc3e4
 
 
 
 
0021652
16bc3e4
 
 
0021652
 
da9138c
 
 
 
 
a96aeb1
16bc3e4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import gradio as gr
import os
import time
import sys
import subprocess
import tempfile
import requests
from urllib.parse import urlparse

# Clone and install faster-whisper from GitHub
subprocess.run(["git", "clone", "https://github.com/SYSTRAN/faster-whisper.git"], check=True)
subprocess.run(["pip", "install", "-e", "./faster-whisper"], check=True)
subprocess.run(["pip", "install", "yt-dlp pytube ffmpeg-python"], check=True)

# Add the faster-whisper directory to the Python path
sys.path.append("./faster-whisper")

from faster_whisper import WhisperModel
from faster_whisper.transcribe import BatchedInferencePipeline
import yt_dlp

def download_audio(url):
    parsed_url = urlparse(url)
    if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
        return download_youtube_audio(url)
    else:
        return download_direct_audio(url)

def download_youtube_audio(url):
    methods = [
        youtube_dl_method,
        pytube_method,
        youtube_dl_alternative_method,
        ffmpeg_method
    ]

    for method in methods:
        try:
            return method(url)
        except Exception as e:
            print(f"Method {method.__name__} failed: {str(e)}")
    
    raise Exception("All download methods failed. Please try a different video or a direct audio URL.")

def youtube_dl_method(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': '%(id)s.%(ext)s',
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        return f"{info['id']}.mp3"

def pytube_method(url):
    from pytube import YouTube
    yt = YouTube(url)
    audio_stream = yt.streams.filter(only_audio=True).first()
    out_file = audio_stream.download()
    base, ext = os.path.splitext(out_file)
    new_file = base + '.mp3'
    os.rename(out_file, new_file)
    return new_file

def youtube_dl_alternative_method(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'outtmpl': '%(id)s.%(ext)s',
        'no_warnings': True,
        'quiet': True,
        'no_check_certificate': True,
        'prefer_insecure': True,
        'nocheckcertificate': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=True)
        return f"{info['id']}.mp3"

def ffmpeg_method(url):
    output_file = tempfile.mktemp(suffix='.mp3')
    command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
    subprocess.run(command, check=True, capture_output=True)
    return output_file

def download_direct_audio(url):
    response = requests.get(url)
    if response.status_code == 200:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
            temp_file.write(response.content)
            return temp_file.name
    else:
        raise Exception(f"Failed to download audio from {url}")

def transcribe_audio(input_source, batch_size):
    try:
        # Initialize the model
        model = WhisperModel("cstr/whisper-large-v3-turbo-int8_float32", device="auto", compute_type="int8")
        batched_model = BatchedInferencePipeline(model=model)

        # Handle input source
        if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
            # It's a URL, download the audio
            audio_path = download_audio(input_source)
        else:
            # It's a local file path
            audio_path = input_source

        # Benchmark transcription time
        start_time = time.time()
        segments, info = batched_model.transcribe(audio_path, batch_size=batch_size)
        end_time = time.time()

        # Generate transcription
        transcription = ""
        for segment in segments:
            transcription += f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"

        # Calculate metrics
        transcription_time = end_time - start_time
        real_time_factor = info.duration / transcription_time
        audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)  # Size in MB

        # Prepare output
        output = f"Transcription:\n\n{transcription}\n"
        output += f"\nLanguage: {info.language}, Probability: {info.language_probability:.2f}\n"
        output += f"Duration: {info.duration:.2f}s, Duration after VAD: {info.duration_after_vad:.2f}s\n"
        output += f"Transcription time: {transcription_time:.2f} seconds\n"
        output += f"Real-time factor: {real_time_factor:.2f}x\n"
        output += f"Audio file size: {audio_file_size:.2f} MB"

        return output

    except Exception as e:
        return f"An error occurred: {str(e)}"

    finally:
        # Clean up downloaded file if it was a URL
        if isinstance(input_source, str) and (input_source.startswith('http://') or input_source.startswith('https://')):
            try:
                os.remove(audio_path)
            except:
                pass

# Gradio interface
iface = gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Textbox(label="Audio Source (Upload, MP3 URL, or YouTube URL)"),
        gr.Slider(minimum=1, maximum=32, step=1, value=16, label="Batch Size")
    ],
    outputs=gr.Textbox(label="Transcription and Metrics"),
    title="Faster Whisper Multi-Input Transcription",
    description="Enter an audio file path, MP3 URL, or YouTube URL to transcribe using Faster Whisper (GitHub version). Adjust the batch size for performance tuning.",
    examples=[
        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", 16],
        ["https://example.com/path/to/audio.mp3", 16],
        ["path/to/local/audio.mp3", 16]
    ],
    cache_examples=False  # Prevents automatic processing of examples
)

iface.launch()