File size: 1,032 Bytes
14562e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from youtube_transcript_api import YouTubeTranscriptApi
from pytube import Playlist
import re
from typing import Optional, List

def extract_video_id(url: str) -> Optional[str]:
    """Extract video ID from various YouTube URL formats."""
    match = re.search(
        r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]+)",
        url
    )
    return match.group(1) if match else None

def get_transcript(video_id: str) -> str:
    """Get transcript from YouTube video ID."""
    try:
        transcript = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript(["en"])
        return " ".join(entry["text"] for entry in transcript.fetch())
    except Exception as e:
        print(f"Error fetching transcript for {video_id}: {str(e)}")
        return ""

def get_playlist_video_ids(playlist_url: str) -> List[str]:
    """Get all video IDs from a YouTube playlist."""
    playlist = Playlist(playlist_url)
    return [url.split("watch?v=")[1] for url in playlist.video_urls]