ContentAgent

Sleeping

ContentAgent / tools /transcribe_youtube

Rename tools/transcribe_youtube_audio to tools/transcribe_youtube

5e834c7 verified about 2 months ago

973 Bytes

	from pytube import YouTube
	import whisper
	import io


	def transcribe_youtube_audio(youtube_url: str) -> str:
	try:
	# Step 1: Download audio from YouTube
	yt = YouTube(youtube_url)
	audio_stream = yt.streams.filter(only_audio=True).first()

	# Use a BytesIO buffer to store the audio in memory
	audio_buffer = io.BytesIO()
	audio_stream.stream_to_buffer(audio_buffer)
	audio_buffer.seek(0) # Reset buffer position to the beginning

	# Step 2: Load Whisper model
	model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy

	# Step 3: Transcribe audio from memory
	result = model.transcribe(audio_buffer)
	return result["text"]
	except Exception as e:
	return f"An error occurred: {str(e)}"

	# Example usage
	youtube_url = "https://www.youtube.com/watch?v=example"
	lyrics = transcribe_youtube_audio(youtube_url)
	print("Lyrics:", lyrics)