webscarper / app.py
mobenta's picture
Update app.py
4f264df verified
raw
history blame
5.79 kB
import gradio as gr
import requests
import os
import re
# Fetch the keys from the environment variable and convert them into a list
YOUTUBE_API_KEYS = os.getenv("YOUTUBE_API_KEYS")
if YOUTUBE_API_KEYS:
YOUTUBE_API_KEYS = [key.strip() for key in YOUTUBE_API_KEYS.split(",")]
else:
raise ValueError("API keys not found. Make sure the secret 'YOUTUBE_API_KEYS' is set.")
# Index to keep track of which API key to use
key_index = 0
def get_api_key():
global key_index
# Get the current API key and increment the index
api_key = YOUTUBE_API_KEYS[key_index]
key_index = (key_index + 1) % len(YOUTUBE_API_KEYS) # Rotate to the next key
return api_key
# Function to search YouTube videos using the API
def youtube_search(query, max_results=50):
search_url = "https://www.googleapis.com/youtube/v3/search"
all_results = []
params = {
"part": "snippet",
"q": query,
"type": "video",
"maxResults": 50 # YouTube API allows a maximum of 50 per request
}
try:
while len(all_results) < max_results:
params["key"] = get_api_key()
response = requests.get(search_url, params=params)
if response.status_code == 403 or response.status_code == 429:
print(f"Quota exceeded or forbidden for API key. Trying next key...")
continue
response.raise_for_status()
results = response.json().get("items", [])
for result in results:
video_info = {
'thumbnail_url': result["snippet"]["thumbnails"]["high"]["url"],
'video_id': result["id"]["videoId"],
'title': result["snippet"]["title"],
'description': result["snippet"]["description"]
}
all_results.append(video_info)
if 'nextPageToken' not in response.json() or len(all_results) >= max_results:
break
params['pageToken'] = response.json()['nextPageToken']
return all_results
except requests.exceptions.RequestException as e:
print(f"Error during YouTube API request: {e}")
return [], f"Error retrieving video results: {str(e)}"
# Function to display the video using the video URL
def show_video(video_url):
video_id = None
patterns = [
r"youtube\.com/watch\?v=([^\&\?\/]+)",
r"youtube\.com/embed/([^\&\?\/]+)",
r"youtube\.com/v/([^\&\?\/]+)",
r"youtu\.be/([^\&\?\/]+)"
]
for pattern in patterns:
match = re.search(pattern, video_url)
if match:
video_id = match.group(1)
break
if not video_id:
return "Invalid YouTube URL. Please enter a valid YouTube video link."
embed_url = f"https://www.youtube.com/embed/{video_id}"
html_code = f'''
<iframe width="560" height="315" src="{embed_url}"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen></iframe>
'''
return html_code
# Create the Gradio interface
with gr.Blocks(css="""
#search_output img {
width: 200px !important;
height: 200px !important;
margin-right: 20px;
}
#search_output .label {
font-size: 36px !important;
}
#search_output .gallery-item {
display: flex !important;
align-items: flex-start !important;
margin-bottom: 60px !important;
}
#search_output .gallery-caption {
text-align: left !important;
padding-left: 20px;
}
#search_output .gallery-item div {
display: flex;
flex-direction: column;
}
#search_output .gallery-item h3 {
font-size: 36px;
margin: 0 0 10px 0;
}
#search_output .gallery-item p {
font-size: 24px;
margin: 0;
}
""") as demo:
gr.Markdown("## YouTube Video Search, Selection, and Playback")
video_ids_state = gr.State() # To store video IDs corresponding to the search results
with gr.Row():
with gr.Column(scale=3):
search_query_input = gr.Textbox(label="Search YouTube", placeholder="Enter your search query here")
search_button = gr.Button("Search")
search_output = gr.Gallery(label="Search Results", columns=1, height="800px", elem_id="search_output")
with gr.Column(scale=2):
selected_video_link = gr.Textbox(label="Selected Video Link", interactive=False)
play_video_button = gr.Button("Play Video")
video_output = gr.HTML(label="Video Player")
# Update the search results and store video IDs
def update_search_results(query):
search_results = youtube_search(query)
gallery_items = []
video_ids = []
for item in search_results:
image_url = item['thumbnail_url']
title = item['title']
description = item['description']
caption = f"<div><h3>{title}</h3><p>{description}</p></div>"
gallery_items.append((image_url, caption))
video_ids.append(item['video_id'])
return gallery_items, video_ids
# When a video is selected
def on_video_select(evt: gr.SelectData, video_ids):
index = evt.index
selected_video_id = video_ids[index]
video_url = f"https://www.youtube.com/watch?v={selected_video_id}"
return video_url
# Play the video
def play_video(video_url):
return show_video(video_url)
search_button.click(update_search_results, inputs=search_query_input, outputs=[search_output, video_ids_state])
search_output.select(on_video_select, inputs=video_ids_state, outputs=selected_video_link)
play_video_button.click(play_video, inputs=selected_video_link, outputs=video_output)
# Launch the Gradio interface
demo.launch()