ContentAgent

Sleeping

App Files Files Community

ContentAgent / tools /transcribe_youtube

yetessam

New transcribe object

b321e9e verified 8 months ago

raw

history blame

2.26 kB

	from typing import Any, Optional
	from smolagents.tools import Tool
	from pytube import youtube
	import whisper
	import io


	class TranscribeYouTubeTool(Tool):
	name = "transcribe_youtube"
	description = "Returns a youtube transcript."
	inputs = {'query': {'type': 'string', 'description': 'A YouTube URL.'}}
	output_type = "string"

	def __init__(self, max_results=10, **kwargs):
	super().__init__()
	self.max_results = max_results
	try:
	from pytub import YouTube
	except ImportError as e:
	raise ImportError(
	"You must install package `pytube` to run this tool: for instance run `pip install pytube`."
	) from e

	self.yt = YouTube(**kwargs)
	self.audio_buff = get_youtube_audio(yt = yt()

	def forward(self, query: str) -> str:
	results = self.ddgs.text(query, max_results=self.max_results)
	if len(results) == 0:
	raise Exception("No results found! Try a less restrictive/shorter query.")
	postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
	return "## Search Results\n\n" + "\n\n".join(postprocessed_results)


	def get_audio():
	try:

	audio_stream = self.yt.streams.filter(only_audio=True).first()

	# Use a BytesIO buffer to store the audio in memory
	audio_buffer = io.BytesIO()
	audio_stream.stream_to_buffer(audio_buffer)
	audio_buffer.seek(0) # Reset buffer position to the beginning
	return audio_buffer

	except Exception as e:
	return f"An error occurred: {str(e)}"

	def get_text():
	try:

	# Step 2: Load Whisper model
	model = whisper.load_model("base") # Use "small", "medium", or "large" for better accuracy

	# Step 3: Transcribe audio from memory
	result = model.transcribe(self.audio_buffer)
	return result["text"]
	except Exception as e:
	return f"An error occurred: {str(e)}"


	# Example usage
	#youtube_url = "https://www.youtube.com/watch?v=example"
	#lyrics = transcribe_youtube_audio(youtube_url)
	#print("Lyrics:", lyrics)