from typing import Any, Optional from smolagents.tools import Tool from pytube import youtube import whisper import io class TranscribeYouTubeTool(Tool): name = "transcribe_youtube" description = "Returns a youtube transcript." inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}} output_type = "string" def __init__(self, max_results=10, **kwargs): super().__init__() self.max_results = max_results try: from pytub import YouTube except ImportError as e: raise ImportError( "You must install package `pytube` to run this tool: for instance run `pip install pytube`." ) from e self.yt = YouTube(**kwargs) self.audio_buff = get_youtube_audio(yt = yt() def forward(self, query: str) -> str: results = self.ddgs.text(query, max_results=self.max_results) if len(results) == 0: raise Exception("No results found! Try a less restrictive/shorter query.") postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results] return "## Search Results\n\n" + "\n\n".join(postprocessed_results) def get_audio(): try: audio_stream = self.yt.streams.filter(only_audio=True).first() # Use a BytesIO buffer to store the audio in memory audio_buffer = io.BytesIO() audio_stream.stream_to_buffer(audio_buffer) audio_buffer.seek(0) # Reset buffer position to the beginning return audio_buffer except Exception as e: return f"An error occurred: {str(e)}" def get_text(): try: # Step 2: Load Whisper model model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy # Step 3: Transcribe audio from memory result = model.transcribe(self.audio_buffer) return result["text"] except Exception as e: return f"An error occurred: {str(e)}" # Example usage #youtube_url = "https://www.youtube.com/watch?v=example" #lyrics = transcribe_youtube_audio(youtube_url) #print("Lyrics:", lyrics)