from youtube_transcript_api import YouTubeTranscriptApi | |
from pytube import Playlist | |
import re | |
from typing import Optional, List | |
def extract_video_id(url: str) -> Optional[str]: | |
"""Extract video ID from various YouTube URL formats.""" | |
match = re.search( | |
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]+)", | |
url | |
) | |
return match.group(1) if match else None | |
def get_transcript(video_id: str) -> str: | |
"""Get transcript from YouTube video ID.""" | |
try: | |
transcript = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript(["en"]) | |
return " ".join(entry["text"] for entry in transcript.fetch()) | |
except Exception as e: | |
print(f"Error fetching transcript for {video_id}: {str(e)}") | |
return "" | |
def get_playlist_video_ids(playlist_url: str) -> List[str]: | |
"""Get all video IDs from a YouTube playlist.""" | |
playlist = Playlist(playlist_url) | |
return [url.split("watch?v=")[1] for url in playlist.video_urls] |