yt-transcript / app.py
noumanjavaid's picture
Create app.py
26b0eae verified
raw
history blame
3.42 kB
import streamlit as st
import pandas as pd
from youtube_transcript_api import YouTubeTranscriptApi
import re
from io import StringIO
def extract_video_id(url):
"""Extract YouTube video ID from various URL formats"""
patterns = [
r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
r'(?:embed\/)([0-9A-Za-z_-]{11})',
r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})'
]
for pattern in patterns:
match = re.search(pattern, url)
if match:
return match.group(1)
return None
def get_transcript(video_id):
"""Get transcript for a single video"""
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
return transcript
except Exception as e:
return str(e)
st.title('YouTube Transcript Extractor')
st.write('Enter YouTube video URLs (one per line) to extract their transcripts.')
# Text area for input
urls = st.text_area('YouTube URLs', height=150,
help='Enter one YouTube URL per line')
if st.button('Extract Transcripts'):
if urls:
# Split URLs into list
url_list = urls.split('\n')
url_list = [url.strip() for url in url_list if url.strip()]
if url_list:
results = []
# Progress bar
progress_bar = st.progress(0)
for i, url in enumerate(url_list):
video_id = extract_video_id(url)
if video_id:
transcript = get_transcript(video_id)
if isinstance(transcript, list):
# Successful transcript extraction
full_text = ' '.join([entry['text'] for entry in transcript])
results.append({
'URL': url,
'Video ID': video_id,
'Status': 'Success',
'Transcript': full_text
})
else:
# Error occurred
results.append({
'URL': url,
'Video ID': video_id,
'Status': 'Error',
'Transcript': transcript
})
else:
results.append({
'URL': url,
'Video ID': None,
'Status': 'Error',
'Transcript': 'Invalid YouTube URL'
})
# Update progress bar
progress_bar.progress((i + 1) / len(url_list))
# Create DataFrame
df = pd.DataFrame(results)
# Display results
st.subheader('Results')
st.dataframe(df[['URL', 'Status', 'Transcript']])
# Download button
if not df.empty:
csv = df.to_csv(index=False)
st.download_button(
label="Download transcripts as CSV",
data=csv,
file_name="youtube_transcripts.csv",
mime="text/csv"
)
else:
st.error('Please enter valid YouTube URLs')
else:
st.warning('Please enter at least one YouTube URL')