webscarper / app.py
mobenta's picture
Update app.py
7e7541a verified
raw
history blame
4.42 kB
import subprocess
import sys
# Ensure compatible versions of httpx and httpcore are installed
subprocess.check_call([sys.executable, "-m", "pip", "install", "httpx==0.18.2", "httpcore==0.13.6"])
import gradio as gr
import requests
from bs4 import BeautifulSoup
import re
# Function to search YouTube videos using web scraping
def youtube_search(query, max_results=50):
search_url = f"https://www.youtube.com/results?search_query={query}"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
}
try:
# Make the HTTP request to YouTube search results page
response = requests.get(search_url, headers=headers)
response.raise_for_status() # Raise an error for bad responses (4xx or 5xx)
# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")
video_elements = soup.find_all("a", href=True, class_="yt-uix-tile-link")
# Extract up to 'max_results' video details
gallery_items = []
for idx, video in enumerate(video_elements):
if idx >= max_results:
break
video_id = video['href'].split('=')[-1]
video_title = video.get_text(strip=True)
thumbnail_url = f"https://img.youtube.com/vi/{video_id}/mqdefault.jpg"
# Append tuple (thumbnail, video ID)
gallery_items.append((thumbnail_url, video_id))
return gallery_items
except requests.exceptions.RequestException as e:
# Print the error message to help debug issues
print(f"Error during YouTube web scraping request: {e}")
return []
# Function to display the video using the video URL
def show_video(video_url):
# Regular expression to extract the YouTube video ID from the URL
video_id = None
patterns = [
r"youtube\.com/watch\?v=([^&?\/]+)",
r"youtube\.com/embed/([^&?\/]+)",
r"youtube\.com/v/([^&?\/]+)",
r"youtu\.be/([^&?\/]+)"
]
for pattern in patterns:
match = re.search(pattern, video_url)
if match:
video_id = match.group(1)
break
# If no video ID is found, return an error message
if not video_id:
return "Invalid YouTube URL. Please enter a valid YouTube video link."
# Create the embed URL
embed_url = f"https://www.youtube.com/embed/{video_id}"
# Return an iframe with the video
html_code = f'''
<iframe width="560" height="315" src="{embed_url}"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen></iframe>
'''
return html_code
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## YouTube Video Search, Selection, and Playback")
with gr.Row():
with gr.Column(scale=3):
search_query_input = gr.Textbox(label="Search YouTube", placeholder="Enter your search query here")
search_button = gr.Button("Search")
search_output = gr.Gallery(label="Search Results", columns=5, height="1500px")
with gr.Column(scale=2):
selected_video_link = gr.Textbox(label="Selected Video Link", interactive=False)
play_video_button = gr.Button("Play Video")
video_output = gr.HTML(label="Video Player")
# Define search button behavior
def update_search_results(query):
gallery_items = youtube_search(query)
return gallery_items
# Update the selected video link field when a video is clicked in the gallery
def on_video_select(evt: gr.SelectData):
# Extract the video ID from the event value, which is a dictionary containing details of the selected item
selected_video_id = evt.value["caption"]
video_url = f"https://www.youtube.com/watch?v={selected_video_id}"
return video_url
# Play the video when the Play Video button is clicked
def play_video(video_url):
return show_video(video_url)
search_button.click(update_search_results, inputs=search_query_input, outputs=search_output)
search_output.select(on_video_select, inputs=None, outputs=selected_video_link)
play_video_button.click(play_video, inputs=selected_video_link, outputs=video_output)
# Launch the Gradio interface
demo.launch()