webscarper / app.py
mobenta's picture
Update app.py
072569c verified
raw
history blame
6.65 kB
import random
import logging
import yt_dlp
import time
import gradio as gr
# Configure logging for debugging purposes
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
# Define the list of proxies provided by you
PROXIES = [
'43.134.229.98:3128', '35.220.254.137:8080', '31.129.253.30:40223', '41.204.53.17:80', '193.233.84.88:1080',
'4.158.61.222:8080', '66.29.154.105:3128', '160.86.242.23:8080', '20.26.249.29:8080', '85.210.84.189:8080',
# Add more as needed...
]
# Function to get a random proxy from the list
def get_random_proxy():
return random.choice(PROXIES)
# Function to search YouTube videos using yt-dlp for better reliability
def youtube_search(query, max_results=10):
cookies_file = "cookies.txt" # You need to provide this file with cookies exported from YouTube
proxies_attempted = 0
max_proxy_attempts = len(PROXIES)
success = False
gallery_items = []
error_message = ""
while not success and proxies_attempted < max_proxy_attempts:
proxy = get_random_proxy()
logging.debug(f"Trying proxy: {proxy}")
ydl_opts = {
'quiet': False, # Set to False to get more detailed output from yt-dlp
'logger': logging.getLogger(), # Use the logging module to capture yt-dlp logs
'simulate': True,
'noplaylist': True, # Avoid extracting playlists
'format': 'best',
'proxy': f'http://{proxy}',
}
if cookies_file and os.path.exists(cookies_file):
ydl_opts['cookiefile'] = cookies_file
logging.debug("Using cookies for YouTube authentication.")
search_url = f"ytsearch{max_results}:{query}"
logging.debug(f"Starting YouTube search for query: {query}")
try:
# Introduce a random delay to avoid rate-limiting issues
time.sleep(random.uniform(2, 5))
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(search_url, download=False)
if 'entries' in result:
logging.debug(f"Number of entries found: {len(result['entries'])}")
for entry in result['entries']:
video_id = entry.get('id')
# Fallback to YouTube static thumbnails if missing
thumbnail_url = entry.get('thumbnail') if entry.get('thumbnail') else f"https://img.youtube.com/vi/{video_id}/hqdefault.jpg"
video_title = entry.get('title', "Unknown Title")
video_description = entry.get('description', "No description available.")
if video_id:
gallery_items.append({
"thumbnail": thumbnail_url,
"video_id": video_id,
"title": video_title,
"description": video_description
})
logging.debug(f"Added video: ID={video_id}, Thumbnail={thumbnail_url}, Title={video_title}")
else:
logging.debug(f"Missing video ID for entry: {entry}")
success = True
else:
logging.warning("No entries found in search result.")
except Exception as e:
error_message = f"Error during YouTube yt-dlp request: {e}"
logging.error(error_message)
proxies_attempted += 1 # Increment the proxy attempt counter
if not success:
return [], error_message
return gallery_items, ""
# Function to display the video using the video URL
def show_video(video_url):
video_id = None
patterns = [
r"youtube\\.com/watch\\?v=([^&?\\/]+)",
r"youtube\\.com/embed/([^&?\\/]+)",
r"youtube\\.com/v/([^&?\\/]+)",
r"youtu\\.be/([^&?\\/]+)"
]
for pattern in patterns:
match = re.search(pattern, video_url)
if match:
video_id = match.group(1)
logging.debug(f"Extracted video ID: {video_id}")
break
if not video_id:
logging.error("Invalid YouTube URL. Please enter a valid YouTube video link.")
return "Invalid YouTube URL. Please enter a valid YouTube video link."
embed_url = f"https://www.youtube.com/embed/{video_id}"
logging.debug(f"Embed URL generated: {embed_url}")
html_code = f'''
<iframe width="560" height="315" src="{embed_url}"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen></iframe>
'''
return html_code
# Gradio Interface Setup
with gr.Blocks() as demo:
gr.Markdown("## YouTube Video Search, Selection, and Playback")
with gr.Row():
with gr.Column(scale=3):
search_query_input = gr.Textbox(label="Search YouTube", placeholder="Enter your search query here")
search_button = gr.Button("Search")
search_output = gr.Gallery(label="Search Results", columns=2, height="1000px", elem_id="gallery")
error_output = gr.Textbox(label="Error Message", interactive=False, visible=False)
with gr.Column(scale=2):
selected_video_link = gr.Textbox(label="Selected Video Link", interactive=False)
play_video_button = gr.Button("Play Video")
video_output = gr.HTML(label="Video Player")
def update_search_results(query):
gallery_items, error_message = youtube_search(query)
if error_message:
return [], error_message, gr.update(visible=True)
gallery_items_display = [(item["thumbnail"], f"{item['title']}\\n{item['description']}", item["video_id"]) for item in gallery_items]
return gallery_items_display, "", gr.update(visible=False)
def on_video_select(evt: gr.SelectData):
selected_video_id = evt.value["caption"]
video_url = f"https://www.youtube.com/watch?v={selected_video_id}"
logging.debug(f"Video selected: {video_url}")
return video_url
def play_video(video_url):
logging.debug(f"Playing video with URL: {video_url}")
return show_video(video_url)
search_button.click(update_search_results, inputs=search_query_input, outputs=[search_output, error_output, error_output])
search_output.select(on_video_select, inputs=None, outputs=selected_video_link)
play_video_button.click(play_video, inputs=selected_video_link, outputs=video_output)
demo.launch()