Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,174 +1,168 @@
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
3 |
-
import requests
|
4 |
from pathlib import Path
|
5 |
from zipfile import ZipFile
|
|
|
|
|
6 |
|
7 |
-
#
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
# Download files
|
22 |
-
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
|
23 |
-
download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))
|
24 |
-
|
25 |
-
# Assume `names.zip` is already in the repo
|
26 |
-
if not Path("names").exists():
|
27 |
-
print("Unzipping local names.zip...")
|
28 |
-
with ZipFile("names.zip", 'r') as zip_ref:
|
29 |
-
zip_ref.extractall("names")
|
30 |
-
else:
|
31 |
-
print("SSA name files already extracted.")
|
32 |
-
|
33 |
-
# Define where to find SSA name files
|
34 |
-
ssa_name_txt_files = list(Path("names").glob("yob*.txt"))
|
35 |
-
|
36 |
-
# ------------------
|
37 |
-
# Load Data
|
38 |
-
# ------------------
|
39 |
|
40 |
def load_ssa_names():
|
41 |
-
|
42 |
-
for
|
43 |
-
|
44 |
-
df = pd.read_csv(
|
45 |
-
df["year"] =
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
.groupby(["name", "sex"], as_index=False)["count"]
|
51 |
.sum()
|
52 |
-
.sort_values(
|
53 |
)
|
54 |
-
return
|
55 |
-
|
56 |
-
def load_bible_names():
|
57 |
-
bible_names_df = pd.read_csv("BibleData-Person.csv")
|
58 |
-
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
|
59 |
-
bible_names_personlabel_df = bible_names_personlabel_df.merge(
|
60 |
-
bible_names_df[["person_id", "sex"]],
|
61 |
-
on="person_id",
|
62 |
-
how="left"
|
63 |
-
)
|
64 |
-
return bible_names_personlabel_df
|
65 |
|
66 |
-
# Load data on startup
|
67 |
ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
|
68 |
-
bible_names_df =
|
|
|
|
|
69 |
|
|
|
|
|
70 |
|
71 |
-
|
72 |
-
# filtering and picking
|
73 |
-
# -----------------------
|
74 |
|
75 |
def get_normal_and_bible(
|
76 |
-
|
77 |
bible_names_df,
|
78 |
-
min_length_ssa=
|
79 |
-
max_length_ssa=
|
80 |
-
min_length_bible=
|
81 |
-
max_length_bible=
|
82 |
-
ssa_popularity_percentile=
|
83 |
sex=None,
|
84 |
forbidden_names=None,
|
|
|
85 |
):
|
86 |
-
|
87 |
-
|
88 |
-
if ssa_popularity_percentile is not None:
|
89 |
-
low, high = ssa_popularity_percentile
|
90 |
-
name_counts = (
|
91 |
-
ssa_names_df.groupby("name", as_index=False)["count"]
|
92 |
-
.sum()
|
93 |
-
.sort_values(by="count", ascending=True)
|
94 |
-
)
|
95 |
-
total = len(name_counts)
|
96 |
-
selected_names = name_counts.iloc[int(low * total):int(high * total)]["name"]
|
97 |
-
filtered_ssa_df = ssa_names_df[ssa_names_df["name"].isin(selected_names)]
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
if sex:
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
except Exception as e:
|
138 |
-
names.append(f"[Error: {e}]")
|
139 |
-
return "\n".join(names)
|
140 |
-
|
141 |
-
# ------------------
|
142 |
-
# Gradio Interface
|
143 |
-
# ------------------
|
144 |
|
145 |
with gr.Blocks() as demo:
|
146 |
-
gr.Markdown("
|
147 |
|
148 |
with gr.Row():
|
149 |
-
n_slider = gr.Slider(1,
|
150 |
sex_choice = gr.Radio(["M", "F", "Any"], label="Sex", value="Any")
|
151 |
|
152 |
with gr.Row():
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
|
157 |
with gr.Row():
|
158 |
-
|
159 |
-
|
160 |
|
161 |
with gr.Row():
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
164 |
|
165 |
-
|
166 |
-
|
167 |
|
168 |
generate_btn.click(
|
169 |
fn=generate_names,
|
170 |
-
inputs=[
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
)
|
173 |
|
174 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import pandas as pd
|
|
|
3 |
from pathlib import Path
|
4 |
from zipfile import ZipFile
|
5 |
+
import io
|
6 |
+
import contextlib
|
7 |
|
8 |
+
# --- File download & setup ---
|
9 |
+
def extract_names_zip():
|
10 |
+
zip_path = Path("names.zip")
|
11 |
+
if not zip_path.exists():
|
12 |
+
raise FileNotFoundError("names.zip not found. Please upload it manually to the repo.")
|
13 |
+
with ZipFile(zip_path, 'r') as zip_ref:
|
14 |
+
zip_ref.extractall(".")
|
15 |
+
print("Unzipped names.zip")
|
16 |
+
|
17 |
+
extract_names_zip()
|
18 |
+
|
19 |
+
# --- Load datasets ---
|
20 |
+
ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def load_ssa_names():
|
23 |
+
dfs = []
|
24 |
+
for f in ssa_name_txt_files:
|
25 |
+
year = int(f.stem.replace("yob", ""))
|
26 |
+
df = pd.read_csv(f, names=["name", "sex", "count"])
|
27 |
+
df["year"] = year
|
28 |
+
dfs.append(df)
|
29 |
+
full_df = pd.concat(dfs, ignore_index=True)
|
30 |
+
agg_df = (
|
31 |
+
full_df
|
32 |
.groupby(["name", "sex"], as_index=False)["count"]
|
33 |
.sum()
|
34 |
+
.sort_values("count", ascending=False)
|
35 |
)
|
36 |
+
return full_df, agg_df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
|
|
38 |
ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
|
39 |
+
bible_names_df = pd.read_csv("BibleData-Person.csv")
|
40 |
+
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
|
41 |
+
bible_names_personlabel_df = bible_names_personlabel_df.merge(bible_names_df[["person_id", "sex"]], on="person_id", how="left")
|
42 |
|
43 |
+
# --- Name generation logic ---
|
44 |
+
import random
|
45 |
|
46 |
+
last_names = ["Smith", "Johnson", "Williams", "Taylor", "Brown"]
|
|
|
|
|
47 |
|
48 |
def get_normal_and_bible(
|
49 |
+
ssa_names_aggregated_df,
|
50 |
bible_names_df,
|
51 |
+
min_length_ssa=3,
|
52 |
+
max_length_ssa=8,
|
53 |
+
min_length_bible=3,
|
54 |
+
max_length_bible=8,
|
55 |
+
ssa_popularity_percentile=(0.95, 1.0),
|
56 |
sex=None,
|
57 |
forbidden_names=None,
|
58 |
+
debug=False,
|
59 |
):
|
60 |
+
if forbidden_names is None:
|
61 |
+
forbidden_names = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
+
filtered_ssa = ssa_names_aggregated_df.copy()
|
64 |
+
filtered_ssa = filtered_ssa[
|
65 |
+
filtered_ssa["name"].str.len().between(min_length_ssa, max_length_ssa)
|
66 |
+
]
|
67 |
+
if sex:
|
68 |
+
filtered_ssa = filtered_ssa[filtered_ssa["sex"] == sex]
|
69 |
+
if debug:
|
70 |
+
print(f"SSA names after length/sex filter: {len(filtered_ssa)}")
|
71 |
+
|
72 |
+
total = len(filtered_ssa)
|
73 |
+
filtered_ssa = filtered_ssa.sort_values("count")
|
74 |
+
low, high = ssa_popularity_percentile
|
75 |
+
idx_start = int(total * low)
|
76 |
+
idx_end = int(total * high)
|
77 |
+
filtered_ssa = filtered_ssa.iloc[idx_start:idx_end]
|
78 |
+
if debug:
|
79 |
+
print(f"SSA names after popularity percentile slice: {len(filtered_ssa)}")
|
80 |
+
|
81 |
+
ssa_name = filtered_ssa.sample(1)["name"].values[0]
|
82 |
+
|
83 |
+
filtered_bible = bible_names_df.copy()
|
84 |
+
filtered_bible = filtered_bible[
|
85 |
+
filtered_bible["name"].str.len().between(min_length_bible, max_length_bible)
|
86 |
+
]
|
87 |
if sex:
|
88 |
+
filtered_bible = filtered_bible[filtered_bible["sex"] == sex]
|
89 |
+
filtered_bible = filtered_bible[~filtered_bible["name"].isin(forbidden_names)]
|
90 |
+
if debug:
|
91 |
+
print(f"Bible names after filtering: {len(filtered_bible)}")
|
92 |
+
|
93 |
+
if len(filtered_bible) == 0 or len(filtered_ssa) == 0:
|
94 |
+
raise ValueError("No valid names found after filtering.")
|
95 |
+
|
96 |
+
bible_name = filtered_bible.sample(1)["name"].values[0]
|
97 |
+
|
98 |
+
return ssa_name, bible_name
|
99 |
+
|
100 |
+
# --- Gradio app ---
|
101 |
+
def generate_names(n, sex, min_len, max_len, min_bible_len, max_bible_len, pop_low, pop_high, debug_flag):
|
102 |
+
results = []
|
103 |
+
debug_output = io.StringIO()
|
104 |
+
with contextlib.redirect_stdout(debug_output):
|
105 |
+
for _ in range(n):
|
106 |
+
try:
|
107 |
+
normal, bible = get_normal_and_bible(
|
108 |
+
ssa_names_aggregated_df,
|
109 |
+
bible_names_df,
|
110 |
+
min_length_ssa=min_len,
|
111 |
+
max_length_ssa=max_len,
|
112 |
+
min_length_bible=min_bible_len,
|
113 |
+
max_length_bible=max_bible_len,
|
114 |
+
ssa_popularity_percentile=(pop_low, pop_high),
|
115 |
+
sex=sex if sex in {"M", "F"} else None,
|
116 |
+
debug=debug_flag,
|
117 |
+
)
|
118 |
+
last = random.choice(last_names)
|
119 |
+
results.append(f"{bible} {normal} {last}")
|
120 |
+
except Exception as e:
|
121 |
+
results.append(f"[Error: {e}]")
|
122 |
+
|
123 |
+
return "\n".join(results), debug_output.getvalue()
|
124 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
with gr.Blocks() as demo:
|
127 |
+
gr.Markdown("# 📜 Random Bible + SSA Name Generator")
|
128 |
|
129 |
with gr.Row():
|
130 |
+
n_slider = gr.Slider(1, 20, value=5, step=1, label="How many names?")
|
131 |
sex_choice = gr.Radio(["M", "F", "Any"], label="Sex", value="Any")
|
132 |
|
133 |
with gr.Row():
|
134 |
+
ssa_len = gr.Slider(3, 12, value=3, step=1, label="SSA name min length")
|
135 |
+
ssa_max_len = gr.Slider(3, 12, value=8, step=1, label="SSA name max length")
|
|
|
136 |
|
137 |
with gr.Row():
|
138 |
+
bible_len = gr.Slider(3, 12, value=3, step=1, label="Bible name min length")
|
139 |
+
bible_max_len = gr.Slider(3, 12, value=8, step=1, label="Bible name max length")
|
140 |
|
141 |
with gr.Row():
|
142 |
+
pop_low_slider = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="SSA Popularity: Low Percentile")
|
143 |
+
pop_high_slider = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="SSA Popularity: High Percentile")
|
144 |
+
|
145 |
+
debug_checkbox = gr.Checkbox(label="Show debug output", value=True)
|
146 |
+
|
147 |
+
generate_btn = gr.Button("🔀 Generate Names")
|
148 |
|
149 |
+
output_box = gr.Textbox(label="Generated Names", lines=10)
|
150 |
+
debug_box = gr.Textbox(label="Debug Output", lines=10)
|
151 |
|
152 |
generate_btn.click(
|
153 |
fn=generate_names,
|
154 |
+
inputs=[
|
155 |
+
n_slider,
|
156 |
+
sex_choice,
|
157 |
+
ssa_len,
|
158 |
+
ssa_max_len,
|
159 |
+
bible_len,
|
160 |
+
bible_max_len,
|
161 |
+
pop_low_slider,
|
162 |
+
pop_high_slider,
|
163 |
+
debug_checkbox
|
164 |
+
],
|
165 |
+
outputs=[output_box, debug_box],
|
166 |
)
|
167 |
|
168 |
demo.launch()
|