cdleong commited on
Commit
a85053a
·
verified ·
1 Parent(s): 74efe75

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import requests
4
+ from pathlib import Path
5
+ from zipfile import ZipFile
6
+
7
+ # ------------------
8
+ # Download Resources
9
+ # ------------------
10
+ def download_file(url: str, output_path: Path):
11
+ if output_path.exists():
12
+ print(f"Skipping {output_path.name}, already exists.")
13
+ return
14
+ print(f"Downloading {url}")
15
+ response = requests.get(url)
16
+ response.raise_for_status()
17
+ output_path.write_bytes(response.content)
18
+ print(f"Saved to {output_path}")
19
+
20
+ # Download files
21
+ download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
22
+ download_file("https://www.ssa.gov/oact/babynames/names.zip", Path("names.zip"))
23
+ download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))
24
+
25
+ # Unzip names.zip
26
+ with ZipFile("names.zip", 'r') as zip_ref:
27
+ zip_ref.extractall(".")
28
+ print("Unzipped names.zip")
29
+
30
+ # ------------------
31
+ # Load Data
32
+ # ------------------
33
+ ssa_name_txt_files = list(Path(".").glob("yob*.txt"))
34
+
35
+ def load_ssa_names():
36
+ ssa_dfs = []
37
+ for names_file in ssa_name_txt_files:
38
+ yob = int(names_file.name.split(".")[0].replace("yob", ""))
39
+ df = pd.read_csv(names_file, names=["name", "sex", "count"])
40
+ df["year"] = yob
41
+ ssa_dfs.append(df)
42
+ ssa_names_df = pd.concat(ssa_dfs)
43
+ ssa_names_aggregated_df = (
44
+ ssa_names_df
45
+ .groupby(["name", "sex"], as_index=False)["count"]
46
+ .sum()
47
+ .sort_values(by="count", ascending=False)
48
+ )
49
+ return ssa_names_df, ssa_names_aggregated_df
50
+
51
+ def load_bible_names():
52
+ bible_names_df = pd.read_csv("BibleData-Person.csv")
53
+ bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
54
+ bible_names_personlabel_df = bible_names_personlabel_df.merge(
55
+ bible_names_df[["person_id", "sex"]],
56
+ on="person_id",
57
+ how="left"
58
+ )
59
+ return bible_names_df
60
+
61
+ # Load on startup
62
+ ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
63
+ bible_names_df = load_bible_names()
64
+
65
+ # ------------------
66
+ # Gradio Interface
67
+ # ------------------
68
+
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("## Bible Names & SSA Names Datasets")
71
+
72
+ with gr.Tab("Bible Names"):
73
+ gr.Dataframe(bible_names_df.head(100), label="Bible Names (first 100 rows)")
74
+
75
+ with gr.Tab("SSA Names (Aggregated)"):
76
+ gr.Dataframe(ssa_names_aggregated_df.head(100), label="Top SSA Names (first 100 rows)")
77
+
78
+ demo.launch()