jarif commited on
Commit
6c43c08
·
verified ·
1 Parent(s): b587870

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +31 -41
  2. requirements.txt +0 -0
app.py CHANGED
@@ -69,52 +69,42 @@ Focus on the main points and key insights. Write in a professional tone.
69
  prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
70
 
71
  def get_youtube_content(url):
72
- """Get content from YouTube video using yt-dlp"""
73
  try:
74
- ydl_opts = {
75
- 'format': 'worst',
76
- 'extract_flat': True,
77
- 'quiet': True,
78
- 'no_warnings': True,
79
- 'cookiefile': 'youtube_cookies.txt', # Primary cookie method
80
- 'cookiesfrombrowser': ('chrome',), # Backup cookie method
81
- 'extractor_args': { # Additional arguments to help bypass restrictions
82
- 'youtube': {
83
- 'skip': ['dash', 'hls'],
84
- 'player_skip': ['js', 'configs', 'webpage']
85
- }
86
- },
87
- 'no_check_certificate': True,
88
- }
89
-
90
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
91
- try:
92
- info = ydl.extract_info(url, download=False)
93
- except Exception as yt_error:
94
- if "Sign in to confirm your age" in str(yt_error):
95
- ydl_opts['format'] = 'best[height<=480]' # Try a different format
96
- ydl = yt_dlp.YoutubeDL(ydl_opts)
97
- info = ydl.extract_info(url, download=False)
98
- else:
99
- raise yt_error
100
-
101
- title = info.get('title', '')
102
- description = info.get('description', '')
103
- views = info.get('view_count', 'Unknown')
104
- uploader = info.get('uploader', 'Unknown')
105
- upload_date = info.get('upload_date', 'Unknown')
106
-
107
- content = f"""
108
  Video Title: {title}
109
  Uploader: {uploader}
110
- Upload Date: {upload_date}
111
- Views: {views}
112
 
113
- Description:
114
- {description}
115
  """
116
- return [Document(page_content=content)]
117
-
118
  except Exception as e:
119
  st.error(f"Error getting YouTube content: {str(e)}")
120
  return None
 
69
  prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
70
 
71
  def get_youtube_content(url):
72
+ """Get content from YouTube video using youtube-transcript-api"""
73
  try:
74
+ from youtube_transcript_api import YouTubeTranscriptApi
75
+ from urllib.parse import urlparse, parse_qs
76
+
77
+ # Extract video ID from URL
78
+ if 'youtube.com' in url:
79
+ video_id = parse_qs(urlparse(url).query)['v'][0]
80
+ elif 'youtu.be' in url:
81
+ video_id = urlparse(url).path[1:]
82
+ else:
83
+ raise ValueError("Not a valid YouTube URL")
84
+
85
+ # Get the transcript
86
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
87
+ transcript_text = ' '.join([entry['text'] for entry in transcript_list])
88
+
89
+ # Get video info using requests
90
+ response = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json")
91
+ if response.status_code == 200:
92
+ video_info = response.json()
93
+ title = video_info.get('title', '')
94
+ uploader = video_info.get('author_name', '')
95
+ else:
96
+ title = "Unknown Title"
97
+ uploader = "Unknown Uploader"
98
+
99
+ content = f"""
 
 
 
 
 
 
 
 
100
  Video Title: {title}
101
  Uploader: {uploader}
 
 
102
 
103
+ Content:
104
+ {transcript_text}
105
  """
106
+ return [Document(page_content=content)]
107
+
108
  except Exception as e:
109
  st.error(f"Error getting YouTube content: {str(e)}")
110
  return None
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ