import re from youtube_transcript_api import YouTubeTranscriptApi from logging_config import logger def get_video_id(youtube_url: str) -> str: pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)" match = re.search(pattern, youtube_url) if not match: raise ValueError("Could not extract video ID from the provided URL.") return match.group(1) def fetch_youtube_transcript(youtube_url: str) -> str: try: video_id = get_video_id(youtube_url) logger.info(f"Fetching official YouTube transcript for video ID: {video_id}") transcript_data = YouTubeTranscriptApi.get_transcript(video_id) transcript = " ".join([entry["text"] for entry in transcript_data]) return transcript except Exception as e: err_msg = f"Error fetching transcript: {str(e)}" logger.error(err_msg) return err_msg