YoutubeTranscriptTool / youtube_utils.py
maguid28's picture
Implemented transcription fallback using Whisper
4f48868
raw
history blame
869 Bytes
import re
from youtube_transcript_api import YouTubeTranscriptApi
from logging_config import logger
def get_video_id(youtube_url: str) -> str:
pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)"
match = re.search(pattern, youtube_url)
if not match:
raise ValueError("Could not extract video ID from the provided URL.")
return match.group(1)
def fetch_youtube_transcript(youtube_url: str) -> str:
try:
video_id = get_video_id(youtube_url)
logger.info(f"Fetching official YouTube transcript for video ID: {video_id}")
transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
transcript = " ".join([entry["text"] for entry in transcript_data])
return transcript
except Exception as e:
err_msg = f"Error fetching transcript: {str(e)}"
logger.error(err_msg)
return err_msg