producer / utils /youtube_utils.py
dwarkesh's picture
all title thumbnails at once
14562e6
raw
history blame
1.03 kB
from youtube_transcript_api import YouTubeTranscriptApi
from pytube import Playlist
import re
from typing import Optional, List
def extract_video_id(url: str) -> Optional[str]:
"""Extract video ID from various YouTube URL formats."""
match = re.search(
r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([A-Za-z0-9_-]+)",
url
)
return match.group(1) if match else None
def get_transcript(video_id: str) -> str:
"""Get transcript from YouTube video ID."""
try:
transcript = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript(["en"])
return " ".join(entry["text"] for entry in transcript.fetch())
except Exception as e:
print(f"Error fetching transcript for {video_id}: {str(e)}")
return ""
def get_playlist_video_ids(playlist_url: str) -> List[str]:
"""Get all video IDs from a YouTube playlist."""
playlist = Playlist(playlist_url)
return [url.split("watch?v=")[1] for url in playlist.video_urls]