import streamlit as st import pandas as pd from youtube_transcript_api import YouTubeTranscriptApi import re from io import StringIO def extract_video_id(url): """Extract YouTube video ID from various URL formats""" patterns = [ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*', r'(?:embed\/)([0-9A-Za-z_-]{11})', r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})' ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None def get_transcript(video_id): """Get transcript for a single video""" try: transcript = YouTubeTranscriptApi.get_transcript(video_id) return transcript except Exception as e: return str(e) st.title('YouTube Transcript Extractor') st.write('Enter YouTube video URLs (one per line) to extract their transcripts.') # Text area for input urls = st.text_area('YouTube URLs', height=150, help='Enter one YouTube URL per line') if st.button('Extract Transcripts'): if urls: # Split URLs into list url_list = urls.split('\n') url_list = [url.strip() for url in url_list if url.strip()] if url_list: results = [] # Progress bar progress_bar = st.progress(0) for i, url in enumerate(url_list): video_id = extract_video_id(url) if video_id: transcript = get_transcript(video_id) if isinstance(transcript, list): # Successful transcript extraction full_text = ' '.join([entry['text'] for entry in transcript]) results.append({ 'URL': url, 'Video ID': video_id, 'Status': 'Success', 'Transcript': full_text }) else: # Error occurred results.append({ 'URL': url, 'Video ID': video_id, 'Status': 'Error', 'Transcript': transcript }) else: results.append({ 'URL': url, 'Video ID': None, 'Status': 'Error', 'Transcript': 'Invalid YouTube URL' }) # Update progress bar progress_bar.progress((i + 1) / len(url_list)) # Create DataFrame df = pd.DataFrame(results) # Display results st.subheader('Results') st.dataframe(df[['URL', 'Status', 'Transcript']]) # Download button if not df.empty: csv = df.to_csv(index=False) st.download_button( label="Download transcripts as CSV", data=csv, file_name="youtube_transcripts.csv", mime="text/csv" ) else: st.error('Please enter valid YouTube URLs') else: st.warning('Please enter at least one YouTube URL')