webscarper / app.py
mobenta's picture
Update app.py
0ef74be verified
raw
history blame
6.09 kB
import gradio as gr
import requests
import os
import re
# Fetch the keys from the environment variable and convert them into a list
YOUTUBE_API_KEYS = os.getenv("YOUTUBE_API_KEYS")
if YOUTUBE_API_KEYS:
YOUTUBE_API_KEYS = [key.strip() for key in YOUTUBE_API_KEYS.split(",")]
else:
raise ValueError("API keys not found. Make sure the secret 'YOUTUBE_API_KEYS' is set.")
# Index to keep track of which API key to use
key_index = 0
def get_api_key():
global key_index
# Get the current API key and increment the index
api_key = YOUTUBE_API_KEYS[key_index]
key_index = (key_index + 1) % len(YOUTUBE_API_KEYS) # Rotate to the next key
return api_key
# Function to search YouTube videos using the API with pagination to get up to 1,000 results
def youtube_search(query, max_results=50):
search_url = "https://www.googleapis.com/youtube/v3/search"
all_results = []
params = {
"part": "snippet",
"q": query,
"type": "video",
"maxResults": 50 # YouTube API allows a maximum of 50 per request
}
try:
while len(all_results) < max_results:
params["key"] = get_api_key() # Get the current API key
response = requests.get(search_url, params=params)
# If we get a bad response, try the next API key
if response.status_code == 403 or response.status_code == 429:
print(f"Quota exceeded or forbidden for API key. Trying next key...")
continue
response.raise_for_status() # Raise an error for other bad responses (4xx or 5xx)
results = response.json().get("items", [])
for result in results:
video_info = {
'thumbnail_url': result["snippet"]["thumbnails"]["medium"]["url"],
'video_id': result["id"]["videoId"],
'title': result["snippet"]["title"],
'description': result["snippet"]["description"]
}
all_results.append(video_info)
# If there is no nextPageToken, we've reached the end
if 'nextPageToken' not in response.json() or len(all_results) >= max_results:
break
# Update params with the nextPageToken to get the next batch of results
params['pageToken'] = response.json()['nextPageToken']
return all_results
except requests.exceptions.RequestException as e:
# Print the error message to help debug issues
print(f"Error during YouTube API request: {e}")
return [], f"Error retrieving video results: {str(e)}"
# Function to display the video using the video URL
def show_video(video_url):
# Regular expression to extract the YouTube video ID from the URL
video_id = None
patterns = [
r"youtube\.com/watch\?v=([^\&\?\/]+)",
r"youtube\.com/embed/([^\&\?\/]+)",
r"youtube\.com/v/([^\&\?\/]+)",
r"youtu\.be/([^\&\?\/]+)"
]
for pattern in patterns:
match = re.search(pattern, video_url)
if match:
video_id = match.group(1)
break
# If no video ID is found, return an error message
if not video_id:
return "Invalid YouTube URL. Please enter a valid YouTube video link."
# Create the embed URL
embed_url = f"https://www.youtube.com/embed/{video_id}"
# Return an iframe with the video
html_code = f'''
<iframe width="560" height="315" src="{embed_url}"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen></iframe>
'''
return html_code
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## YouTube Video Search, Selection, and Playback")
with gr.Row():
with gr.Column(scale=3):
search_query_input = gr.Textbox(label="Search YouTube", placeholder="Enter your search query here")
search_button = gr.Button("Search")
search_output = gr.HTML(label="Search Results")
with gr.Column(scale=2):
selected_video_link = gr.Textbox(label="Selected Video Link", interactive=False, elem_id='selected_video_link')
play_video_button = gr.Button("Play Video")
video_output = gr.HTML(label="Video Player")
# Define search button behavior
def update_search_results(query):
search_results = youtube_search(query)
html_code = '<div>'
for item in search_results:
video_id = item['video_id']
thumbnail_url = item['thumbnail_url']
title = item['title']
description = item['description']
# Create an HTML snippet for this item
html_code += f'''
<div class="search-item" style="display:flex; align-items:center; margin-bottom:10px; cursor:pointer;" onclick="selectVideo('{video_id}')">
<img src="{thumbnail_url}" alt="{title}" style="width:160px; height:auto; margin-right:10px;">
<div>
<h4>{title}</h4>
<p>{description}</p>
</div>
</div>
'''
html_code += '''
<script>
function selectVideo(video_id) {
const gradioApp = document.getElementsByTagName('gradio-app')[0].shadowRoot || document;
const textbox = gradioApp.querySelector('#selected_video_link textarea');
textbox.value = 'https://www.youtube.com/watch?v=' + video_id;
textbox.dispatchEvent(new Event('input', { bubbles: true }));
}
</script>
'''
html_code += '</div>'
return html_code
# Play the video when the Play Video button is clicked
def play_video(video_url):
return show_video(video_url)
search_button.click(update_search_results, inputs=search_query_input, outputs=search_output)
play_video_button.click(play_video, inputs=selected_video_link, outputs=video_output)
# Launch the Gradio interface
demo.launch()