Spaces:

jarif
/

Simplify-YouTube-Videos-Web-Articles

Sleeping

App Files Files Community

jarif commited on Feb 1

Commit

6c43c08

verified ·

1 Parent(s): b587870

Upload 2 files

Browse files

Files changed (2) hide show

app.py +31 -41
requirements.txt +0 -0

app.py CHANGED Viewed

@@ -69,52 +69,42 @@ Focus on the main points and key insights. Write in a professional tone.
 prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
 def get_youtube_content(url):
-    """Get content from YouTube video using yt-dlp"""
     try:
-        ydl_opts = {
-            'format': 'worst',
-            'extract_flat': True,
-            'quiet': True,
-            'no_warnings': True,
-            'cookiefile': 'youtube_cookies.txt',  # Primary cookie method
-            'cookiesfrombrowser': ('chrome',),  # Backup cookie method
-            'extractor_args': {  # Additional arguments to help bypass restrictions
-                'youtube': {
-                    'skip': ['dash', 'hls'],
-                    'player_skip': ['js', 'configs', 'webpage']
-                }
-            },
-            'no_check_certificate': True,
-        }
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            try:
-                info = ydl.extract_info(url, download=False)
-            except Exception as yt_error:
-                if "Sign in to confirm your age" in str(yt_error):
-                    ydl_opts['format'] = 'best[height<=480]'  # Try a different format
-                    ydl = yt_dlp.YoutubeDL(ydl_opts)
-                    info = ydl.extract_info(url, download=False)
-                else:
-                    raise yt_error
-            title = info.get('title', '')
-            description = info.get('description', '')
-            views = info.get('view_count', 'Unknown')
-            uploader = info.get('uploader', 'Unknown')
-            upload_date = info.get('upload_date', 'Unknown')
-            content = f"""
 Video Title: {title}
 Uploader: {uploader}
-Upload Date: {upload_date}
-Views: {views}
-Description:
-{description}
 """
-            return [Document(page_content=content)]
     except Exception as e:
         st.error(f"Error getting YouTube content: {str(e)}")
         return None

 prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
 def get_youtube_content(url):
+    """Get content from YouTube video using youtube-transcript-api"""
     try:
+        from youtube_transcript_api import YouTubeTranscriptApi
+        from urllib.parse import urlparse, parse_qs
+        # Extract video ID from URL
+        if 'youtube.com' in url:
+            video_id = parse_qs(urlparse(url).query)['v'][0]
+        elif 'youtu.be' in url:
+            video_id = urlparse(url).path[1:]
+        else:
+            raise ValueError("Not a valid YouTube URL")
+        # Get the transcript
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        transcript_text = ' '.join([entry['text'] for entry in transcript_list])
+        # Get video info using requests
+        response = requests.get(f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={video_id}&format=json")
+        if response.status_code == 200:
+            video_info = response.json()
+            title = video_info.get('title', '')
+            uploader = video_info.get('author_name', '')
+        else:
+            title = "Unknown Title"
+            uploader = "Unknown Uploader"
+        content = f"""
 Video Title: {title}
 Uploader: {uploader}
+Content:
+{transcript_text}
 """
+        return [Document(page_content=content)]
     except Exception as e:
         st.error(f"Error getting YouTube content: {str(e)}")
         return None

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ