Spaces:
Sleeping
Sleeping
| from typing import Any, Optional | |
| from smolagents.tools import Tool | |
| from pytube import youtube | |
| import whisper | |
| import io | |
| class TranscribeYouTubeTool(Tool): | |
| name = "transcribe_youtube" | |
| description = "Returns a youtube transcript." | |
| inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}} | |
| output_type = "string" | |
| def __init__(self, max_results=10, **kwargs): | |
| super().__init__() | |
| self.max_results = max_results | |
| try: | |
| from pytub import YouTube | |
| except ImportError as e: | |
| raise ImportError( | |
| "You must install package `pytube` to run this tool: for instance run `pip install pytube`." | |
| ) from e | |
| self.yt = YouTube(**kwargs) | |
| self.audio_buff = get_youtube_audio(yt = yt() | |
| def forward(self, query: str) -> str: | |
| results = self.ddgs.text(query, max_results=self.max_results) | |
| if len(results) == 0: | |
| raise Exception("No results found! Try a less restrictive/shorter query.") | |
| postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results] | |
| return "## Search Results\n\n" + "\n\n".join(postprocessed_results) | |
| def get_audio(): | |
| try: | |
| audio_stream = self.yt.streams.filter(only_audio=True).first() | |
| # Use a BytesIO buffer to store the audio in memory | |
| audio_buffer = io.BytesIO() | |
| audio_stream.stream_to_buffer(audio_buffer) | |
| audio_buffer.seek(0) # Reset buffer position to the beginning | |
| return audio_buffer | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| def get_text(): | |
| try: | |
| # Step 2: Load Whisper model | |
| model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy | |
| # Step 3: Transcribe audio from memory | |
| result = model.transcribe(self.audio_buffer) | |
| return result["text"] | |
| except Exception as e: | |
| return f"An error occurred: {str(e)}" | |
| # Example usage | |
| #youtube_url = "https://www.youtube.com/watch?v=example" | |
| #lyrics = transcribe_youtube_audio(youtube_url) | |
| #print("Lyrics:", lyrics) |