cdleong's picture
Update app.py
d4c6874 verified
raw
history blame
8.43 kB
import gradio as gr
import pandas as pd
from pathlib import Path
from zipfile import ZipFile
import io
import contextlib
import requests
FORBIDDEN_NAMES =["Judas",
"Maher-shalal-hash-baz",
"Bathsheba",
"Jephthah",
"Jehoshaphat",
"Tiebreaker",
"Boanerges",
"Jezebel",
"Gomorrah",
"Hymenaeus",
"Herod",
"Pilate",
"Doeg",
"Ziph",
"Phygelus",
"Hermogenes",
"Philetus",
"Balaam",
"Achan",
"Caiaphas",
"Pontius",
"Ahab",
"Manasseh",
"Rehoboam",
"Nebuchadnezzar",
"Delilah",
"Lo-ammi",
"Lo-ruhamah",
"Beelzebub",
"Ichabod",
"Saphira",
"Jushab-hesed",
"Benjarman",
"Cain",
"Esau",
"Machiavelli", # found
"Barabbas",
"Sapphira",
"Shur",
]
def download_file(url: str, dest_path: Path):
if dest_path.exists():
print(f"{dest_path.name} already exists. Skipping download.")
return
print(f"Downloading {url}")
response = requests.get(url)
response.raise_for_status()
with open(dest_path, "wb") as f:
f.write(response.content)
print(f"Saved to {dest_path}")
# --- File download & setup ---
def extract_names_zip():
zip_path = Path("names.zip")
if not zip_path.exists():
raise FileNotFoundError("names.zip not found. Please upload it manually to the repo.")
with ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(".")
print("Unzipped names.zip")
extract_names_zip()
# Download Bible CSVs if missing
download_file(
"https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv",
Path("BibleData-Person.csv"),
)
download_file(
"https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv",
Path("BibleData-PersonLabel.csv"),
)
# --- Load datasets ---
ssa_name_txt_files = sorted(Path(".").glob("yob*.txt"))
def load_ssa_names():
dfs = []
for f in ssa_name_txt_files:
year = int(f.stem.replace("yob", ""))
df = pd.read_csv(f, names=["name", "sex", "count"])
df["year"] = year
dfs.append(df)
full_df = pd.concat(dfs, ignore_index=True)
agg_df = (
full_df
.groupby(["name", "sex"], as_index=False)["count"]
.sum()
.sort_values("count", ascending=False)
)
return full_df, agg_df
ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
bible_names_df = pd.read_csv("BibleData-Person.csv")
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
bible_names_personlabel_df = bible_names_personlabel_df.merge(bible_names_df[["person_id", "sex"]], on="person_id", how="left")
bible_names_personlabel_df = bible_names_personlabel_df[bible_names_personlabel_df["label_type"] == "proper name"]
bible_names_personlabel_df["sex"] = bible_names_personlabel_df["sex"].replace({"male": "M", "female": "F"})
# --- Name generation logic ---
import random
last_names = ["Smith", "Johnson", "Williams", "Taylor", "Brown"]
def get_normal_and_bible(
ssa_names_aggregated_df,
bible_names_df,
min_length_ssa=3,
max_length_ssa=8,
min_length_bible=3,
max_length_bible=8,
ssa_popularity_percentile=(0.95, 1.0),
sex=None,
forbidden_names=None,
ssa_names_col="name",
bible_names_col="english_label",
debug=False,
):
if forbidden_names is None:
forbidden_names = set()
filtered_ssa = ssa_names_aggregated_df.copy()
filtered_ssa = filtered_ssa[
filtered_ssa[ssa_names_col].str.len().between(min_length_ssa, max_length_ssa)
]
if sex:
filtered_ssa = filtered_ssa[filtered_ssa["sex"] == sex]
if debug:
print(f"SSA names after length/sex filter: {len(filtered_ssa)}")
total = len(filtered_ssa)
filtered_ssa = filtered_ssa.sort_values("count")
low, high = ssa_popularity_percentile
idx_start = int(total * low)
idx_end = int(total * high)
filtered_ssa = filtered_ssa.iloc[idx_start:idx_end]
if debug:
print(f"SSA names after popularity percentile slice: {len(filtered_ssa)}")
ssa_name = filtered_ssa.sample(1)[ssa_names_col].values[0]
filtered_bible = bible_names_df.copy()
filtered_bible = filtered_bible[
filtered_bible[bible_names_col].str.len().between(min_length_bible, max_length_bible)
]
if sex:
filtered_bible = filtered_bible[filtered_bible["sex"] == sex]
filtered_bible = filtered_bible[~filtered_bible[bible_names_col].isin(forbidden_names)]
if debug:
print(f"Bible names after filtering: {len(filtered_bible)}")
if len(filtered_bible) == 0 or len(filtered_ssa) == 0:
raise ValueError("No valid names found after filtering.")
bible_name = filtered_bible.sample(1)[bible_names_col].values[0]
return ssa_name, bible_name
# --- Gradio app ---
def generate_names(n, sex, min_len, max_len, min_bible_len, max_bible_len, pop_low, pop_high, debug_flag, last, forbidden_names_text):
results = []
debug_output = io.StringIO()
forbidden_names = set(name.strip() for name in forbidden_names_text.split(",") if name.strip())
with contextlib.redirect_stdout(debug_output):
for i in range(n):
try:
normal, bible = get_normal_and_bible(
ssa_names_aggregated_df,
bible_names_personlabel_df,
min_length_ssa=min_len,
max_length_ssa=max_len,
min_length_bible=min_bible_len,
max_length_bible=max_bible_len,
ssa_popularity_percentile=(pop_low, pop_high),
sex=sex if sex in {"M", "F"} else None,
forbidden_names=forbidden_names,
debug=(i==0),
)
if last is None:
last = random.choice(last_names)
results.append(f"{bible} {normal} {last}")
except Exception as e:
results.append(f"[Error: {e}]")
return "\n".join(results), debug_output.getvalue()
with gr.Blocks() as demo:
gr.Markdown("# 📜 Random Bible + SSA Name Generator")
with gr.Row():
n_slider = gr.Slider(1, 20, value=5, step=1, label="How many names?")
sex_choice = gr.Radio(["M", "F", "Any"], label="Sex", value="Any")
with gr.Row():
ssa_len = gr.Slider(3, 40, value=3, step=1, label="SSA name min length")
ssa_max_len = gr.Slider(3, 40, value=8, step=1, label="SSA name max length")
with gr.Row():
bible_len = gr.Slider(3, 40, value=3, step=1, label="Bible name min length")
bible_max_len = gr.Slider(3, 40, value=8, step=1, label="Bible name max length")
with gr.Row():
pop_low_slider = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="SSA Popularity: Low Percentile")
pop_high_slider = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="SSA Popularity: High Percentile")
with gr.Row():
last_name_input = gr.Textbox(label="Last Name")
with gr.Row():
forbidden_names_input = gr.Textbox(label="FORBIDDEN NAMES (comma-separated)", value=FORBIDDEN_NAMES)
debug_checkbox = gr.Checkbox(label="Show debug output", value=True)
generate_btn = gr.Button("🔀 Generate Names")
output_box = gr.Textbox(label="Generated Names", lines=10)
debug_box = gr.Textbox(label="Debug Output", lines=10)
generate_btn.click(
fn=generate_names,
inputs=[
n_slider,
sex_choice,
ssa_len,
ssa_max_len,
bible_len,
bible_max_len,
pop_low_slider,
pop_high_slider,
debug_checkbox,
last_name_input,
forbidden_names_input,
],
outputs=[output_box, debug_box],
)
demo.launch()