Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import requests | |
from pathlib import Path | |
from zipfile import ZipFile | |
# ------------------ | |
# Data prep | |
# ------------------ | |
def download_file(url: str, output_path: Path): | |
if output_path.exists(): | |
print(f"Skipping {output_path.name}, already exists.") | |
return | |
print(f"Downloading {url}") | |
response = requests.get(url) | |
response.raise_for_status() | |
output_path.write_bytes(response.content) | |
print(f"Saved to {output_path}") | |
# Download files | |
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv")) | |
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv")) | |
# Assume `names.zip` is already in the repo | |
if not Path("names").exists(): | |
print("Unzipping local names.zip...") | |
with ZipFile("names.zip", 'r') as zip_ref: | |
zip_ref.extractall("names") | |
else: | |
print("SSA name files already extracted.") | |
# Define where to find SSA name files | |
ssa_name_txt_files = list(Path("names").glob("yob*.txt")) | |
# ------------------ | |
# Load Data | |
# ------------------ | |
def load_ssa_names(): | |
ssa_dfs = [] | |
for names_file in ssa_name_txt_files: | |
yob = int(names_file.name.split(".")[0].replace("yob", "")) | |
df = pd.read_csv(names_file, names=["name", "sex", "count"]) | |
df["year"] = yob | |
ssa_dfs.append(df) | |
ssa_names_df = pd.concat(ssa_dfs) | |
ssa_names_aggregated_df = ( | |
ssa_names_df | |
.groupby(["name", "sex"], as_index=False)["count"] | |
.sum() | |
.sort_values(by="count", ascending=False) | |
) | |
return ssa_names_df, ssa_names_aggregated_df | |
def load_bible_names(): | |
bible_names_df = pd.read_csv("BibleData-Person.csv") | |
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv") | |
bible_names_personlabel_df = bible_names_personlabel_df.merge( | |
bible_names_df[["person_id", "sex"]], | |
on="person_id", | |
how="left" | |
) | |
return bible_names_df | |
# Load data on startup | |
ssa_names_df, ssa_names_aggregated_df = load_ssa_names() | |
bible_names_df = load_bible_names() | |
# ------------------ | |
# Gradio Interface | |
# ------------------ | |
with gr.Blocks() as demo: | |
gr.Markdown("## Bible Names & SSA Names Datasets") | |
with gr.Tab("Bible Names"): | |
gr.Dataframe(bible_names_df.head(100), label="Bible Names (first 100 rows)") | |
with gr.Tab("SSA Names (Aggregated)"): | |
gr.Dataframe(ssa_names_aggregated_df.head(100), label="Top SSA Names (first 100 rows)") | |
demo.launch() | |