ContentAgent / tools /transcribe_youtube
yetessam's picture
Rename tools/transcribe_youtube_audio to tools/transcribe_youtube
5e834c7 verified
raw
history blame
973 Bytes
from pytube import YouTube
import whisper
import io
def transcribe_youtube_audio(youtube_url: str) -> str:
try:
# Step 1: Download audio from YouTube
yt = YouTube(youtube_url)
audio_stream = yt.streams.filter(only_audio=True).first()
# Use a BytesIO buffer to store the audio in memory
audio_buffer = io.BytesIO()
audio_stream.stream_to_buffer(audio_buffer)
audio_buffer.seek(0) # Reset buffer position to the beginning
# Step 2: Load Whisper model
model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy
# Step 3: Transcribe audio from memory
result = model.transcribe(audio_buffer)
return result["text"]
except Exception as e:
return f"An error occurred: {str(e)}"
# Example usage
youtube_url = "https://www.youtube.com/watch?v=example"
lyrics = transcribe_youtube_audio(youtube_url)
print("Lyrics:", lyrics)