File size: 2,643 Bytes
a85053a
 
 
 
 
 
 
e3d7a97
a85053a
 
 
 
 
 
e3d7a97
a85053a
 
 
 
f66b860
a85053a
 
 
 
e3d7a97
 
 
 
 
 
 
 
 
 
a85053a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3d7a97
a85053a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import pandas as pd
import requests
from pathlib import Path
from zipfile import ZipFile

# ------------------
# Data prep
# ------------------
def download_file(url: str, output_path: Path):
    if output_path.exists():
        print(f"Skipping {output_path.name}, already exists.")
        return
    print(f"Downloading {url}")
    response = requests.get(url)
    response.raise_for_status()
    output_path.write_bytes(response.content)
    print(f"Saved to {output_path}")


# Download files
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))

# Assume `names.zip` is already in the repo
if not Path("names").exists():
    print("Unzipping local names.zip...")
    with ZipFile("names.zip", 'r') as zip_ref:
        zip_ref.extractall("names")
else:
    print("SSA name files already extracted.")

# Define where to find SSA name files
ssa_name_txt_files = list(Path("names").glob("yob*.txt"))

# ------------------
# Load Data
# ------------------

def load_ssa_names():
    ssa_dfs = []
    for names_file in ssa_name_txt_files:
        yob = int(names_file.name.split(".")[0].replace("yob", ""))
        df = pd.read_csv(names_file, names=["name", "sex", "count"])
        df["year"] = yob
        ssa_dfs.append(df)
    ssa_names_df = pd.concat(ssa_dfs)
    ssa_names_aggregated_df = (
        ssa_names_df
        .groupby(["name", "sex"], as_index=False)["count"]
        .sum()
        .sort_values(by="count", ascending=False)
    )
    return ssa_names_df, ssa_names_aggregated_df

def load_bible_names():
    bible_names_df = pd.read_csv("BibleData-Person.csv")
    bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
    bible_names_personlabel_df = bible_names_personlabel_df.merge(
        bible_names_df[["person_id", "sex"]],
        on="person_id",
        how="left"
    )
    return bible_names_df

# Load data on startup
ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
bible_names_df = load_bible_names()

# ------------------
# Gradio Interface
# ------------------

with gr.Blocks() as demo:
    gr.Markdown("## Bible Names & SSA Names Datasets")

    with gr.Tab("Bible Names"):
        gr.Dataframe(bible_names_df.head(100), label="Bible Names (first 100 rows)")

    with gr.Tab("SSA Names (Aggregated)"):
        gr.Dataframe(ssa_names_aggregated_df.head(100), label="Top SSA Names (first 100 rows)")

demo.launch()