File size: 3,062 Bytes
bc736f8
cd2dfd4
 
bc736f8
cd2dfd4
49d7053
bc736f8
cd2dfd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b3ac8
cd2dfd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import yt_dlp as yt
import whisper
import os
import torch
import logging

# Set up logging
logging.basicConfig(filename='transcription_logs.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Create a temporary download folder if it doesn't exist
temp_download_dir = os.path.join(os.getcwd(), "temp_download")
os.makedirs(temp_download_dir, exist_ok=True)

# Function to download audio from the given URL
def download_audio(url):
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': os.path.join(temp_download_dir, '%(title)s.%(ext)s'),
    }
    with yt.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(url, download=True)
        downloaded_file = ydl.prepare_filename(info_dict)
        # Generate a new file name by replacing spaces with underscores
        new_filename = os.path.join(temp_download_dir, os.path.basename(downloaded_file).replace(" ", "_"))
        # Check if the new file name exists and create a unique name if necessary
        base, extension = os.path.splitext(new_filename)
        counter = 1
        while os.path.exists(new_filename):
            new_filename = f"{base}_{counter}{extension}"
            counter += 1
        # Rename the file
        os.rename(downloaded_file, new_filename)
        if os.path.exists(new_filename):
            return new_filename
        else:
            raise Exception("Failed to download and rename audio file.")

# Function to transcribe audio to SRT format
def transcribe_to_srt(file_path):
    if torch.cuda.is_available():
        model = whisper.load_model("medium", device="cuda")
    else:
        model = whisper.load_model("medium")

    result = model.transcribe(file_path)

    srt_content = ""
    for i, segment in enumerate(result["segments"]):
        start = segment["start"]
        end = segment["end"]
        text = segment["text"]
        srt_content += f"{i + 1}\n"
        srt_content += f"{start:.3f}".replace(".", ",") + " --> " + f"{end:.3f}".replace(".", ",") + "\n"
        srt_content += text + "\n\n"

    return srt_content

def transcribe_video(url):
    try:
        logging.info(f"Transcribing video from URL: {url}")
        audio_file = download_audio(url)
        logging.info(f"Downloaded audio file: {audio_file}")
        srt_content = transcribe_to_srt(audio_file)
        logging.info("Transcription completed successfully!")
        # Optionally, remove the audio file after transcription
        # os.remove(audio_file)
        return srt_content
    except Exception as e:
        logging.error(f"An error occurred: {e}")
        return f"An error occurred: {e}"

iface = gr.Interface(fn=transcribe_video, inputs="text", outputs="text", live=True, title="YouTube/TikTok Video to SRT Transcription")

# Display the logs in the interface
log_viewer = gr.Textbox(text="Logs will appear here...", readonly=True, height=200)
log_handler = logging.StreamHandler(log_viewer)
log_handler.setLevel(logging.INFO)
logging.getLogger().addHandler(log_handler)

iface.launch()