cdleong commited on
Commit
18a8000
·
verified ·
1 Parent(s): a571a87

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -134
app.py CHANGED
@@ -1,174 +1,168 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import requests
4
  from pathlib import Path
5
  from zipfile import ZipFile
 
 
6
 
7
- # ------------------
8
- # Data prep
9
- # ------------------
10
- def download_file(url: str, output_path: Path):
11
- if output_path.exists():
12
- print(f"Skipping {output_path.name}, already exists.")
13
- return
14
- print(f"Downloading {url}")
15
- response = requests.get(url)
16
- response.raise_for_status()
17
- output_path.write_bytes(response.content)
18
- print(f"Saved to {output_path}")
19
-
20
-
21
- # Download files
22
- download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
23
- download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))
24
-
25
- # Assume `names.zip` is already in the repo
26
- if not Path("names").exists():
27
- print("Unzipping local names.zip...")
28
- with ZipFile("names.zip", 'r') as zip_ref:
29
- zip_ref.extractall("names")
30
- else:
31
- print("SSA name files already extracted.")
32
-
33
- # Define where to find SSA name files
34
- ssa_name_txt_files = list(Path("names").glob("yob*.txt"))
35
-
36
- # ------------------
37
- # Load Data
38
- # ------------------
39
 
40
  def load_ssa_names():
41
- ssa_dfs = []
42
- for names_file in ssa_name_txt_files:
43
- yob = int(names_file.name.split(".")[0].replace("yob", ""))
44
- df = pd.read_csv(names_file, names=["name", "sex", "count"])
45
- df["year"] = yob
46
- ssa_dfs.append(df)
47
- ssa_names_df = pd.concat(ssa_dfs)
48
- ssa_names_aggregated_df = (
49
- ssa_names_df
50
  .groupby(["name", "sex"], as_index=False)["count"]
51
  .sum()
52
- .sort_values(by="count", ascending=False)
53
  )
54
- return ssa_names_df, ssa_names_aggregated_df
55
-
56
- def load_bible_names():
57
- bible_names_df = pd.read_csv("BibleData-Person.csv")
58
- bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
59
- bible_names_personlabel_df = bible_names_personlabel_df.merge(
60
- bible_names_df[["person_id", "sex"]],
61
- on="person_id",
62
- how="left"
63
- )
64
- return bible_names_personlabel_df
65
 
66
- # Load data on startup
67
  ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
68
- bible_names_df = load_bible_names()
 
 
69
 
 
 
70
 
71
- # -----------------------
72
- # filtering and picking
73
- # -----------------------
74
 
75
  def get_normal_and_bible(
76
- ssa_names_df,
77
  bible_names_df,
78
- min_length_ssa=1,
79
- max_length_ssa=None,
80
- min_length_bible=1,
81
- max_length_bible=None,
82
- ssa_popularity_percentile=None,
83
  sex=None,
84
  forbidden_names=None,
 
85
  ):
86
- filtered_ssa_df = ssa_names_df
87
-
88
- if ssa_popularity_percentile is not None:
89
- low, high = ssa_popularity_percentile
90
- name_counts = (
91
- ssa_names_df.groupby("name", as_index=False)["count"]
92
- .sum()
93
- .sort_values(by="count", ascending=True)
94
- )
95
- total = len(name_counts)
96
- selected_names = name_counts.iloc[int(low * total):int(high * total)]["name"]
97
- filtered_ssa_df = ssa_names_df[ssa_names_df["name"].isin(selected_names)]
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  if sex:
100
- filtered_ssa_df = filtered_ssa_df[filtered_ssa_df["sex"] == sex]
101
- bible_names_df = bible_names_df[bible_names_df["sex"] == sex]
102
-
103
- if forbidden_names:
104
- filtered_ssa_df = filtered_ssa_df[~filtered_ssa_df["name"].isin(forbidden_names)]
105
- bible_names_df = bible_names_df[~bible_names_df["english_label"].isin(forbidden_names)]
106
-
107
- ssa_mask = filtered_ssa_df["name"].str.len() >= min_length_ssa
108
- if max_length_ssa is not None:
109
- ssa_mask &= filtered_ssa_df["name"].str.len() <= max_length_ssa
110
- normal_names = filtered_ssa_df[ssa_mask]["name"].unique().tolist()
111
-
112
- bible_mask = bible_names_df["english_label"].str.len() >= min_length_bible
113
- if max_length_bible is not None:
114
- bible_mask &= bible_names_df["english_label"].str.len() <= max_length_bible
115
- bible_names = bible_names_df[bible_mask]["english_label"].unique().tolist()
116
-
117
- if not normal_names or not bible_names:
118
- raise ValueError("No names found with given constraints")
119
-
120
- return random.choice(normal_names), random.choice(bible_names)
121
-
122
- def generate_names(n, sex, min_len, max_len, min_bible_len, max_bible_len, pop_low, pop_high, last="Smith"):
123
- names = []
124
- for _ in range(n):
125
- try:
126
- normal, bible = get_normal_and_bible(
127
- ssa_names_aggregated_df,
128
- bible_names_df,
129
- min_length_ssa=min_len,
130
- max_length_ssa=max_len,
131
- min_length_bible=min_bible_len,
132
- max_length_bible=max_bible_len,
133
- ssa_popularity_percentile=(pop_low, pop_high),
134
- sex=sex if sex in {"M", "F"} else None
135
- )
136
- names.append(f"{bible} {normal} {last}")
137
- except Exception as e:
138
- names.append(f"[Error: {e}]")
139
- return "\n".join(names)
140
-
141
- # ------------------
142
- # Gradio Interface
143
- # ------------------
144
 
145
  with gr.Blocks() as demo:
146
- gr.Markdown("## 📜 Bible + SSA Name Generator")
147
 
148
  with gr.Row():
149
- n_slider = gr.Slider(1, 100, value=10, label="Number of names")
150
  sex_choice = gr.Radio(["M", "F", "Any"], label="Sex", value="Any")
151
 
152
  with gr.Row():
153
- pop_low_slider = gr.Slider(0.0, 1.0, value=0.0, step=0.01, label="Popularity Percentile Min (SSA)")
154
- pop_high_slider = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="Popularity Percentile Max (SSA)")
155
-
156
 
157
  with gr.Row():
158
- ssa_len = gr.Slider(1, 15, value=1, label="SSA Name Length (min)")
159
- ssa_max_len = gr.Slider(1, 15, value=10, label="SSA Name Length (max)")
160
 
161
  with gr.Row():
162
- bible_len = gr.Slider(1, 15, value=4, label="Bible Name Length (min)")
163
- bible_max_len = gr.Slider(1, 15, value=10, label="Bible Name Length (max)")
 
 
 
 
164
 
165
- generate_btn = gr.Button("Generate Names")
166
- output_box = gr.Textbox(label="Generated Names", lines=15)
167
 
168
  generate_btn.click(
169
  fn=generate_names,
170
- inputs=[n_slider, sex_choice, ssa_len, ssa_max_len, bible_len, bible_max_len, pop_low_slider,pop_high_slider],
171
- outputs=output_box
 
 
 
 
 
 
 
 
 
 
172
  )
173
 
174
  demo.launch()
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  from pathlib import Path
4
  from zipfile import ZipFile
5
+ import io
6
+ import contextlib
7
 
8
+ # --- File download & setup ---
9
+ def extract_names_zip():
10
+ zip_path = Path("names.zip")
11
+ if not zip_path.exists():
12
+ raise FileNotFoundError("names.zip not found. Please upload it manually to the repo.")
13
+ with ZipFile(zip_path, 'r') as zip_ref:
14
+ zip_ref.extractall(".")
15
+ print("Unzipped names.zip")
16
+
17
+ extract_names_zip()
18
+
19
+ # --- Load datasets ---
20
+ ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  def load_ssa_names():
23
+ dfs = []
24
+ for f in ssa_name_txt_files:
25
+ year = int(f.stem.replace("yob", ""))
26
+ df = pd.read_csv(f, names=["name", "sex", "count"])
27
+ df["year"] = year
28
+ dfs.append(df)
29
+ full_df = pd.concat(dfs, ignore_index=True)
30
+ agg_df = (
31
+ full_df
32
  .groupby(["name", "sex"], as_index=False)["count"]
33
  .sum()
34
+ .sort_values("count", ascending=False)
35
  )
36
+ return full_df, agg_df
 
 
 
 
 
 
 
 
 
 
37
 
 
38
  ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
39
+ bible_names_df = pd.read_csv("BibleData-Person.csv")
40
+ bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
41
+ bible_names_personlabel_df = bible_names_personlabel_df.merge(bible_names_df[["person_id", "sex"]], on="person_id", how="left")
42
 
43
+ # --- Name generation logic ---
44
+ import random
45
 
46
+ last_names = ["Smith", "Johnson", "Williams", "Taylor", "Brown"]
 
 
47
 
48
  def get_normal_and_bible(
49
+ ssa_names_aggregated_df,
50
  bible_names_df,
51
+ min_length_ssa=3,
52
+ max_length_ssa=8,
53
+ min_length_bible=3,
54
+ max_length_bible=8,
55
+ ssa_popularity_percentile=(0.95, 1.0),
56
  sex=None,
57
  forbidden_names=None,
58
+ debug=False,
59
  ):
60
+ if forbidden_names is None:
61
+ forbidden_names = set()
 
 
 
 
 
 
 
 
 
 
62
 
63
+ filtered_ssa = ssa_names_aggregated_df.copy()
64
+ filtered_ssa = filtered_ssa[
65
+ filtered_ssa["name"].str.len().between(min_length_ssa, max_length_ssa)
66
+ ]
67
+ if sex:
68
+ filtered_ssa = filtered_ssa[filtered_ssa["sex"] == sex]
69
+ if debug:
70
+ print(f"SSA names after length/sex filter: {len(filtered_ssa)}")
71
+
72
+ total = len(filtered_ssa)
73
+ filtered_ssa = filtered_ssa.sort_values("count")
74
+ low, high = ssa_popularity_percentile
75
+ idx_start = int(total * low)
76
+ idx_end = int(total * high)
77
+ filtered_ssa = filtered_ssa.iloc[idx_start:idx_end]
78
+ if debug:
79
+ print(f"SSA names after popularity percentile slice: {len(filtered_ssa)}")
80
+
81
+ ssa_name = filtered_ssa.sample(1)["name"].values[0]
82
+
83
+ filtered_bible = bible_names_df.copy()
84
+ filtered_bible = filtered_bible[
85
+ filtered_bible["name"].str.len().between(min_length_bible, max_length_bible)
86
+ ]
87
  if sex:
88
+ filtered_bible = filtered_bible[filtered_bible["sex"] == sex]
89
+ filtered_bible = filtered_bible[~filtered_bible["name"].isin(forbidden_names)]
90
+ if debug:
91
+ print(f"Bible names after filtering: {len(filtered_bible)}")
92
+
93
+ if len(filtered_bible) == 0 or len(filtered_ssa) == 0:
94
+ raise ValueError("No valid names found after filtering.")
95
+
96
+ bible_name = filtered_bible.sample(1)["name"].values[0]
97
+
98
+ return ssa_name, bible_name
99
+
100
+ # --- Gradio app ---
101
+ def generate_names(n, sex, min_len, max_len, min_bible_len, max_bible_len, pop_low, pop_high, debug_flag):
102
+ results = []
103
+ debug_output = io.StringIO()
104
+ with contextlib.redirect_stdout(debug_output):
105
+ for _ in range(n):
106
+ try:
107
+ normal, bible = get_normal_and_bible(
108
+ ssa_names_aggregated_df,
109
+ bible_names_df,
110
+ min_length_ssa=min_len,
111
+ max_length_ssa=max_len,
112
+ min_length_bible=min_bible_len,
113
+ max_length_bible=max_bible_len,
114
+ ssa_popularity_percentile=(pop_low, pop_high),
115
+ sex=sex if sex in {"M", "F"} else None,
116
+ debug=debug_flag,
117
+ )
118
+ last = random.choice(last_names)
119
+ results.append(f"{bible} {normal} {last}")
120
+ except Exception as e:
121
+ results.append(f"[Error: {e}]")
122
+
123
+ return "\n".join(results), debug_output.getvalue()
124
+
 
 
 
 
 
 
 
125
 
126
  with gr.Blocks() as demo:
127
+ gr.Markdown("# 📜 Random Bible + SSA Name Generator")
128
 
129
  with gr.Row():
130
+ n_slider = gr.Slider(1, 20, value=5, step=1, label="How many names?")
131
  sex_choice = gr.Radio(["M", "F", "Any"], label="Sex", value="Any")
132
 
133
  with gr.Row():
134
+ ssa_len = gr.Slider(3, 12, value=3, step=1, label="SSA name min length")
135
+ ssa_max_len = gr.Slider(3, 12, value=8, step=1, label="SSA name max length")
 
136
 
137
  with gr.Row():
138
+ bible_len = gr.Slider(3, 12, value=3, step=1, label="Bible name min length")
139
+ bible_max_len = gr.Slider(3, 12, value=8, step=1, label="Bible name max length")
140
 
141
  with gr.Row():
142
+ pop_low_slider = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="SSA Popularity: Low Percentile")
143
+ pop_high_slider = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="SSA Popularity: High Percentile")
144
+
145
+ debug_checkbox = gr.Checkbox(label="Show debug output", value=True)
146
+
147
+ generate_btn = gr.Button("🔀 Generate Names")
148
 
149
+ output_box = gr.Textbox(label="Generated Names", lines=10)
150
+ debug_box = gr.Textbox(label="Debug Output", lines=10)
151
 
152
  generate_btn.click(
153
  fn=generate_names,
154
+ inputs=[
155
+ n_slider,
156
+ sex_choice,
157
+ ssa_len,
158
+ ssa_max_len,
159
+ bible_len,
160
+ bible_max_len,
161
+ pop_low_slider,
162
+ pop_high_slider,
163
+ debug_checkbox
164
+ ],
165
+ outputs=[output_box, debug_box],
166
  )
167
 
168
  demo.launch()