Spaces:
Sleeping
Sleeping
import requests | |
import json | |
import gradio as gr | |
import logging | |
from bs4 import BeautifulSoup | |
# Configure logging for debugging purposes | |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
# Function to search YouTube without using the API | |
def youtube_search(query, max_results=10): | |
# Create the YouTube search URL | |
search_url = f"https://www.youtube.com/results?search_query={query.replace(' ', '+')}" | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36" | |
} | |
logging.debug(f"Starting YouTube search for query: {query}") | |
try: | |
response = requests.get(search_url, headers=headers) | |
response.raise_for_status() # Raise an error for bad status codes | |
# Parse the HTML response using BeautifulSoup | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Look for the initial JSON data block within the script tags | |
scripts = soup.find_all("script") | |
for script in scripts: | |
if 'var ytInitialData = ' in script.text: | |
json_text = script.text.split('var ytInitialData = ')[1].split("};")[0] + "}" | |
data = json.loads(json_text) | |
# Traverse through the JSON to find video entries | |
video_items = [] | |
contents = data['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'] | |
for content in contents: | |
video_entries = content.get('itemSectionRenderer', {}).get('contents', []) | |
for entry in video_entries: | |
video_renderer = entry.get('videoRenderer') | |
if video_renderer: | |
video_id = video_renderer.get('videoId', 'N/A') | |
video_title = video_renderer.get('title', {}).get('runs', [{}])[0].get('text', 'N/A') | |
video_url = f"https://www.youtube.com/watch?v={video_id}" | |
thumbnail_url = video_renderer.get('thumbnail', {}).get('thumbnails', [{}])[0].get('url', 'N/A') | |
video_items.append((thumbnail_url, video_title, video_url)) | |
if len(video_items) >= max_results: | |
break | |
if video_items: | |
return video_items, "" # Return the list of video items and no error message | |
else: | |
logging.warning("No video entries found.") | |
return [], "No video entries found." | |
logging.warning("JSON data block not found in the page.") | |
return [], "Unable to find video data." | |
except requests.exceptions.RequestException as e: | |
error_message = f"Request error occurred: {e}" | |
logging.error(error_message) | |
return [], error_message | |
except json.JSONDecodeError as e: | |
error_message = f"JSON decoding error occurred: {e}" | |
logging.error(error_message) | |
return [], error_message | |
except Exception as e: | |
error_message = f"An unexpected error occurred: {e}" | |
logging.error(error_message) | |
return [], error_message | |
# Function to create a gallery for Gradio | |
def display_videos(query): | |
gallery_items, error_message = youtube_search(query) | |
if error_message: | |
return gr.update(value=[], label="Error: Unable to fetch videos"), error_message | |
# Display thumbnails and titles as gallery items | |
formatted_gallery = [(f"<img src='{item[0]}' width='250'/>", f"<a href='{item[2]}' target='_blank'>{item[1]}</a>") for item in gallery_items] | |
return formatted_gallery, "" | |
# Gradio interface | |
with gr | |