webscarper / app.py
mobenta's picture
Update app.py
234f265 verified
raw
history blame
7.02 kB
import gradio as gr
import requests
import os
import re
# Fetch the keys from the environment variable and convert them into a list
YOUTUBE_API_KEYS = os.getenv("YOUTUBE_API_KEYS")
if YOUTUBE_API_KEYS:
YOUTUBE_API_KEYS = [key.strip() for key in YOUTUBE_API_KEYS.split(",")]
else:
raise ValueError("API keys not found. Make sure the secret 'YOUTUBE_API_KEYS' is set.")
# Index to keep track of which API key to use
key_index = 0
def get_api_key():
global key_index
api_key = YOUTUBE_API_KEYS[key_index]
key_index = (key_index + 1) % len(YOUTUBE_API_KEYS) # Rotate to the next key
return api_key
def youtube_search(query, upload_date, video_type, duration, sort_by, max_results=50):
search_url = "https://www.googleapis.com/youtube/v3/search"
all_results = []
params = {
"part": "snippet",
"q": query,
"type": "video",
"maxResults": 50,
"order": sort_by
}
# Add filter parameters based on user selection
if upload_date:
params["publishedAfter"] = upload_date
if video_type and video_type != "All":
params["type"] = video_type.lower() # API expects lowercase values
if duration:
params["videoDuration"] = duration # Set duration parameter for filtering
try:
while len(all_results) < max_results:
params["key"] = get_api_key()
response = requests.get(search_url, params=params)
if response.status_code in [403, 429]:
print(f"Quota exceeded or forbidden for API key. Trying next key...")
continue
response.raise_for_status()
results = response.json().get("items", [])
for result in results:
video_info = {
'thumbnail_url': result["snippet"]["thumbnails"]["high"]["url"],
'video_id': result["id"]["videoId"],
'title': result["snippet"]["title"]
}
all_results.append(video_info)
if 'nextPageToken' not in response.json() or len(all_results) >= max_results:
break
params['pageToken'] = response.json()['nextPageToken']
return all_results
except requests.exceptions.RequestException as e:
print(f"Error during YouTube API request: {e}")
return []
def show_video(video_url):
video_id = None
patterns = [
r"youtube\.com/watch\?v=([^\&\?\/]+)",
r"youtube\.com/embed/([^\&\?\/]+)",
r"youtube\.com/v/([^\&\?\/]+)",
r"youtu\.be/([^\&\?\/]+)"
]
for pattern in patterns:
match = re.search(pattern, video_url)
if match:
video_id = match.group(1)
break
if not video_id:
return "Invalid YouTube URL. Please enter a valid YouTube video link."
embed_url = f"https://www.youtube.com/embed/{video_id}"
html_code = f'''
<iframe width="100%" height="562" src="{embed_url}"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
allowfullscreen></iframe>
'''
return html_code
with gr.Blocks(css="""
#search_output {
max-width: 1300px;
}
#search_output img {
width: 150px !important;
height: 150px !important;
margin-right: 10px;
}
#search_output .gallery-item {
display: flex !important;
align-items: center !important;
margin-bottom: 30px !important;
}
#search_output .gallery-caption {
text-align: left !important;
padding-left: 20px;
font-size: 24px !important;
}
#video_container {
display: flex;
justify-content: center;
align-items: center;
max-width: 1500px;
margin: 0 auto;
padding: 20px;
background-color: #f9f9f9;
border-radius: 8px;
}
""") as demo:
gr.Markdown("## YouTube Video Search, Selection, and Playback")
video_ids_state = gr.State()
with gr.Row(elem_id="video_container"):
video_output = gr.HTML(label="Video Player", elem_id="video_output")
with gr.Row():
selected_video_link = gr.Textbox(label="Selected Video Link", interactive=False)
play_video_button = gr.Button("Play Video")
with gr.Row():
with gr.Column(scale=3):
search_query_input = gr.Textbox(label="Search YouTube",
placeholder="Enter your search query here",
elem_id="search_query_input")
upload_date_input = gr.Dropdown(label="Upload Date", choices=["", "Last Hour", "Today", "This Week", "This Month", "This Year"])
video_type_input = gr.Dropdown(label="Type", choices=["All", "Video", "Channel", "Playlist", "Movie"])
duration_input = gr.Dropdown(label="Duration", choices=["", "Short (<4 mins)", "Medium (4-20 mins)", "Long (>20 mins)"])
sort_by_input = gr.Dropdown(label="Sort By", choices=["relevance", "date", "viewCount", "rating"], value="relevance")
search_button = gr.Button("Search")
search_output = gr.Gallery(label="Search Results", columns=1, height="800px", elem_id="search_output")
def update_search_results(query, upload_date, video_type, duration, sort_by):
# Map the filter values to YouTube API parameters
upload_date_mapping = {
"Last Hour": "now-1h",
"Today": "now-1d",
"This Week": "now-7d",
"This Month": "now-30d",
"This Year": "now-365d"
}
duration_mapping = {
"Short (<4 mins)": "short",
"Medium (4-20 mins)": "medium",
"Long (>20 mins)": "long"
}
# Convert user-friendly values to API-compatible parameters
upload_date_param = upload_date_mapping.get(upload_date, "")
duration_param = duration_mapping.get(duration, "")
search_results = youtube_search(query, upload_date_param, video_type, duration_param, sort_by)
gallery_items = []
video_ids = []
for item in search_results:
image_url = item['thumbnail_url']
title = item['title']
caption = f"{title}"
gallery_items.append((image_url, caption))
video_ids.append(item['video_id'])
return gallery_items, video_ids
def on_video_select(evt: gr.SelectData, video_ids):
index = evt.index
selected_video_id = video_ids[index]
video_url = f"https://www.youtube.com/watch?v={selected_video_id}"
return video_url
def play_video(video_url):
return show_video(video_url)
search_button.click(update_search_results, inputs=[search_query_input, upload_date_input, video_type_input, duration_input, sort_by_input], outputs=[search_output, video_ids_state])
search_output.select(on_video_select, inputs=video_ids_state, outputs=selected_video_link)
play_video_button.click(play_video, inputs=selected_video_link, outputs=video_output)
demo.launch()