cdleong's picture
Update app.py
e3d7a97 verified
raw
history blame
2.64 kB
import gradio as gr
import pandas as pd
import requests
from pathlib import Path
from zipfile import ZipFile
# ------------------
# Data prep
# ------------------
def download_file(url: str, output_path: Path):
if output_path.exists():
print(f"Skipping {output_path.name}, already exists.")
return
print(f"Downloading {url}")
response = requests.get(url)
response.raise_for_status()
output_path.write_bytes(response.content)
print(f"Saved to {output_path}")
# Download files
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))
# Assume `names.zip` is already in the repo
if not Path("names").exists():
print("Unzipping local names.zip...")
with ZipFile("names.zip", 'r') as zip_ref:
zip_ref.extractall("names")
else:
print("SSA name files already extracted.")
# Define where to find SSA name files
ssa_name_txt_files = list(Path("names").glob("yob*.txt"))
# ------------------
# Load Data
# ------------------
def load_ssa_names():
ssa_dfs = []
for names_file in ssa_name_txt_files:
yob = int(names_file.name.split(".")[0].replace("yob", ""))
df = pd.read_csv(names_file, names=["name", "sex", "count"])
df["year"] = yob
ssa_dfs.append(df)
ssa_names_df = pd.concat(ssa_dfs)
ssa_names_aggregated_df = (
ssa_names_df
.groupby(["name", "sex"], as_index=False)["count"]
.sum()
.sort_values(by="count", ascending=False)
)
return ssa_names_df, ssa_names_aggregated_df
def load_bible_names():
bible_names_df = pd.read_csv("BibleData-Person.csv")
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
bible_names_personlabel_df = bible_names_personlabel_df.merge(
bible_names_df[["person_id", "sex"]],
on="person_id",
how="left"
)
return bible_names_df
# Load data on startup
ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
bible_names_df = load_bible_names()
# ------------------
# Gradio Interface
# ------------------
with gr.Blocks() as demo:
gr.Markdown("## Bible Names & SSA Names Datasets")
with gr.Tab("Bible Names"):
gr.Dataframe(bible_names_df.head(100), label="Bible Names (first 100 rows)")
with gr.Tab("SSA Names (Aggregated)"):
gr.Dataframe(ssa_names_aggregated_df.head(100), label="Top SSA Names (first 100 rows)")
demo.launch()