Spaces:
Running
Running
import re | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from logging_config import logger | |
def get_video_id(youtube_url: str) -> str: | |
pattern = r"(?:v=|/shorts/|\.be/)([^&\n?#]+)" | |
match = re.search(pattern, youtube_url) | |
if not match: | |
raise ValueError("Could not extract video ID from the provided URL.") | |
return match.group(1) | |
def fetch_youtube_transcript(youtube_url: str) -> str: | |
try: | |
video_id = get_video_id(youtube_url) | |
logger.info(f"Fetching official YouTube transcript for video ID: {video_id}") | |
transcript_data = YouTubeTranscriptApi.get_transcript(video_id) | |
transcript = " ".join([entry["text"] for entry in transcript_data]) | |
return transcript | |
except Exception as e: | |
err_msg = f"Error fetching transcript: {str(e)}" | |
logger.error(err_msg) | |
return err_msg | |