Spaces:
Sleeping
Sleeping
import json | |
import os | |
import random | |
import re | |
import sys | |
import time | |
from datetime import datetime | |
from glob import glob | |
from pathlib import Path | |
from typing import List, Optional | |
from uuid import uuid4 | |
import gradio as gr | |
import numpy as np | |
import pandas as pd | |
import requests | |
from datasets import load_dataset | |
from huggingface_hub import ( | |
CommitScheduler, | |
HfApi, | |
InferenceClient, | |
login, | |
snapshot_download, | |
) | |
from PIL import Image | |
cached_latest_posts_df = None | |
last_fetched = None | |
def get_latest_pots(): | |
global cached_latest_posts_df | |
global last_fetched | |
# make sure we don't fetch data too often, limit to 1 request per 10 minutes | |
now_time = datetime.now() | |
if last_fetched is not None and (now_time - last_fetched).seconds < 600: | |
print("Using cached data") | |
return cached_latest_posts_df | |
last_fetched = now_time | |
url = "https://www.reddit.com/r/GamePhysics/.json" | |
headers = {"User-Agent": "Mozilla/5.0"} | |
response = requests.get(url, headers=headers) | |
if response.status_code != 200: | |
return [] | |
data = response.json() | |
# Extract posts from the data | |
posts = data["data"]["children"] | |
for post in posts: | |
title = post["data"]["title"] | |
post_id = post["data"]["id"] | |
print(f"ID: {post_id}, Title: {title}") | |
# create [post_id, title] list | |
examples = [[post["data"]["id"], post["data"]["title"]] for post in posts] | |
# make a dataframe | |
examples = pd.DataFrame(examples, columns=["post_id", "title"]) | |
cached_latest_posts_df = examples | |
return examples | |
def row_selected(evt: gr.SelectData): | |
global cached_latest_posts_df | |
row = evt.index[0] | |
post_id = cached_latest_posts_df.iloc[row]["post_id"] | |
return post_id | |
def load_video(url): | |
# Regular expression pattern for r/GamePhysics URLs and IDs | |
pattern = r"https://www\.reddit\.com/r/GamePhysics/comments/([0-9a-zA-Z]+).*|([0-9a-zA-Z]+)" | |
# Match the URL or ID against the pattern | |
match = re.match(pattern, url) | |
if match: | |
# Extract the post ID from the URL | |
post_id = match.group(1) or match.group(2) | |
print(f"Valid GamePhysics post ID: {post_id}") | |
else: | |
post_id = url | |
video_url = f"https://huggingface.co/datasets/asgaardlab/GamePhysicsDailyDump/resolve/main/data/videos/{post_id}.mp4?download=true" | |
# make sure file exists before returning, make a request without downloading the file | |
r = requests.head(video_url) | |
if r.status_code != 200 and r.status_code != 302: | |
raise gr.Error( | |
f"Video is not in the repo, please try another post. - {r.status_code }" | |
) | |
return video_url | |
with gr.Blocks() as demo: | |
gr.Markdown("## Preview GamePhysics") | |
dummt_title = gr.Textbox(visible=False) | |
with gr.Row(): | |
with gr.Column(): | |
reddit_id = gr.Textbox( | |
lines=1, placeholder="Post url or id here", label="URL" | |
) | |
load_btn = gr.Button("Load") | |
video_player = gr.Video() | |
with gr.Column(): | |
latest_post_dataframe = gr.Dataframe() | |
get_latest_pots_btn = gr.Button("Refresh Latest Posts") | |
gr.Markdown("## Latest Posts") | |
load_btn.click(load_video, inputs=[reddit_id], outputs=[video_player]) | |
get_latest_pots_btn.click(get_latest_pots, outputs=[latest_post_dataframe]) | |
demo.load(get_latest_pots, outputs=[latest_post_dataframe]) | |
latest_post_dataframe.select(fn=row_selected, outputs=[reddit_id]) | |
demo.launch() | |