Spaces:
Running
Running
File size: 3,248 Bytes
4f48868 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import os
import subprocess
import tempfile
import yt_dlp
import torch
from transformers import pipeline
from logging_config import logger, log_buffer
device = "cuda" if torch.cuda.is_available() else "cpu"
def convert_audio_to_wav(input_file: str, output_file: str) -> str:
logger.info(f"Converting {input_file} to WAV: {output_file}")
cmd = [
"ffmpeg",
"-y",
"-i", input_file,
"-ar", "16000", # sample rate
"-ac", "1", # mono
output_file
]
subprocess.run(cmd, check=True)
return output_file
def fallback_whisper_transcription(youtube_url: str):
# returns (transcript, logs).
try:
with tempfile.TemporaryDirectory() as tmpdir:
# Create temp dir
logger.info("")
logger.info(f"Created temporary directory: {tmpdir}")
logger.info("")
yield "", log_buffer.getvalue()
# Download best audio
logger.info("Downloading best audio via yt-dlp...")
logger.info("")
yield "", log_buffer.getvalue()
download_path = os.path.join(tmpdir, "audio.%(ext)s")
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': download_path,
'quiet': True,
'postprocessors': []
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([youtube_url])
logger.info("Audio downloaded. Locating the audio file in the temp folder...")
logger.info("")
yield "", log_buffer.getvalue()
# confirm audio file
downloaded_files = os.listdir(tmpdir)
if not downloaded_files:
raise RuntimeError("No audio file was downloaded via yt-dlp.")
audio_file_path = os.path.join(tmpdir, downloaded_files[0])
logger.info(f"Found audio file: {audio_file_path}")
logger.info("Video has downloaded!")
logger.info("")
yield "", log_buffer.getvalue()
# Convert to wav
wav_file_path = os.path.join(tmpdir, "audio.wav")
convert_audio_to_wav(audio_file_path, wav_file_path)
logger.info("Audio converted to WAV successfully.")
logger.info("")
yield "", log_buffer.getvalue()
# Run whisper
logger.info("Running Whisper ASR pipeline on the WAV file...")
logger.info("")
yield "", log_buffer.getvalue()
asr_pipeline = pipeline(
"automatic-speech-recognition",
model="openai/whisper-small",
return_timestamps=True,
device=device,
generate_kwargs={"task": "transcribe", "language": "<|en|>"}
)
result = asr_pipeline(inputs=wav_file_path)
transcription = result["text"]
logger.info("Whisper transcription completed successfully.")
logger.info("")
yield transcription, log_buffer.getvalue()
except Exception as e:
err_msg = f"Error in fallback transcription: {str(e)}"
logger.error(err_msg)
yield err_msg, log_buffer.getvalue()
|