ContentAgent / tools /transcribe_youtube
yetessam's picture
New transcribe object
b321e9e verified
raw
history blame
2.26 kB
from typing import Any, Optional
from smolagents.tools import Tool
from pytube import youtube
import whisper
import io
class TranscribeYouTubeTool(Tool):
name = "transcribe_youtube"
description = "Returns a youtube transcript."
inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}}
output_type = "string"
def __init__(self, max_results=10, **kwargs):
super().__init__()
self.max_results = max_results
try:
from pytub import YouTube
except ImportError as e:
raise ImportError(
"You must install package `pytube` to run this tool: for instance run `pip install pytube`."
) from e
self.yt = YouTube(**kwargs)
self.audio_buff = get_youtube_audio(yt = yt()
def forward(self, query: str) -> str:
results = self.ddgs.text(query, max_results=self.max_results)
if len(results) == 0:
raise Exception("No results found! Try a less restrictive/shorter query.")
postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
def get_audio():
try:
audio_stream = self.yt.streams.filter(only_audio=True).first()
# Use a BytesIO buffer to store the audio in memory
audio_buffer = io.BytesIO()
audio_stream.stream_to_buffer(audio_buffer)
audio_buffer.seek(0) # Reset buffer position to the beginning
return audio_buffer
except Exception as e:
return f"An error occurred: {str(e)}"
def get_text():
try:
# Step 2: Load Whisper model
model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy
# Step 3: Transcribe audio from memory
result = model.transcribe(self.audio_buffer)
return result["text"]
except Exception as e:
return f"An error occurred: {str(e)}"
# Example usage
#youtube_url = "https://www.youtube.com/watch?v=example"
#lyrics = transcribe_youtube_audio(youtube_url)
#print("Lyrics:", lyrics)