File size: 869 Bytes
4f48868
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import re
from youtube_transcript_api import YouTubeTranscriptApi
from logging_config import logger


def get_video_id(youtube_url: str) -> str:
    pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)"
    match = re.search(pattern, youtube_url)
    if not match:
        raise ValueError("Could not extract video ID from the provided URL.")
    return match.group(1)


def fetch_youtube_transcript(youtube_url: str) -> str:
    try:
        video_id = get_video_id(youtube_url)
        logger.info(f"Fetching official YouTube transcript for video ID: {video_id}")
        transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
        transcript = " ".join([entry["text"] for entry in transcript_data])
        return transcript
    except Exception as e:
        err_msg = f"Error fetching transcript: {str(e)}"
        logger.error(err_msg)
        return err_msg