noumanjavaid commited on
Commit
26b0eae
·
verified ·
1 Parent(s): 27a4025

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -0
app.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from youtube_transcript_api import YouTubeTranscriptApi
4
+ import re
5
+ from io import StringIO
6
+
7
+ def extract_video_id(url):
8
+ """Extract YouTube video ID from various URL formats"""
9
+ patterns = [
10
+ r'(?:v=|\/)([0-9A-Za-z_-]{11}).*',
11
+ r'(?:embed\/)([0-9A-Za-z_-]{11})',
12
+ r'(?:youtu\.be\/)([0-9A-Za-z_-]{11})'
13
+ ]
14
+
15
+ for pattern in patterns:
16
+ match = re.search(pattern, url)
17
+ if match:
18
+ return match.group(1)
19
+ return None
20
+
21
+ def get_transcript(video_id):
22
+ """Get transcript for a single video"""
23
+ try:
24
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
25
+ return transcript
26
+ except Exception as e:
27
+ return str(e)
28
+
29
+ st.title('YouTube Transcript Extractor')
30
+ st.write('Enter YouTube video URLs (one per line) to extract their transcripts.')
31
+
32
+ # Text area for input
33
+ urls = st.text_area('YouTube URLs', height=150,
34
+ help='Enter one YouTube URL per line')
35
+
36
+ if st.button('Extract Transcripts'):
37
+ if urls:
38
+ # Split URLs into list
39
+ url_list = urls.split('\n')
40
+ url_list = [url.strip() for url in url_list if url.strip()]
41
+
42
+ if url_list:
43
+ results = []
44
+
45
+ # Progress bar
46
+ progress_bar = st.progress(0)
47
+
48
+ for i, url in enumerate(url_list):
49
+ video_id = extract_video_id(url)
50
+ if video_id:
51
+ transcript = get_transcript(video_id)
52
+
53
+ if isinstance(transcript, list):
54
+ # Successful transcript extraction
55
+ full_text = ' '.join([entry['text'] for entry in transcript])
56
+ results.append({
57
+ 'URL': url,
58
+ 'Video ID': video_id,
59
+ 'Status': 'Success',
60
+ 'Transcript': full_text
61
+ })
62
+ else:
63
+ # Error occurred
64
+ results.append({
65
+ 'URL': url,
66
+ 'Video ID': video_id,
67
+ 'Status': 'Error',
68
+ 'Transcript': transcript
69
+ })
70
+ else:
71
+ results.append({
72
+ 'URL': url,
73
+ 'Video ID': None,
74
+ 'Status': 'Error',
75
+ 'Transcript': 'Invalid YouTube URL'
76
+ })
77
+
78
+ # Update progress bar
79
+ progress_bar.progress((i + 1) / len(url_list))
80
+
81
+ # Create DataFrame
82
+ df = pd.DataFrame(results)
83
+
84
+ # Display results
85
+ st.subheader('Results')
86
+ st.dataframe(df[['URL', 'Status', 'Transcript']])
87
+
88
+ # Download button
89
+ if not df.empty:
90
+ csv = df.to_csv(index=False)
91
+ st.download_button(
92
+ label="Download transcripts as CSV",
93
+ data=csv,
94
+ file_name="youtube_transcripts.csv",
95
+ mime="text/csv"
96
+ )
97
+ else:
98
+ st.error('Please enter valid YouTube URLs')
99
+ else:
100
+ st.warning('Please enter at least one YouTube URL')