import gradio as gr import pandas as pd from pathlib import Path from zipfile import ZipFile import io import contextlib import requests FORBIDDEN_NAMES =["Judas", "Maher-shalal-hash-baz", "Bathsheba", "Jephthah", "Jehoshaphat", "Tiebreaker", "Boanerges", "Jezebel", "Gomorrah", "Hymenaeus", "Herod", "Pilate", "Doeg", "Ziph", "Phygelus", "Hermogenes", "Philetus", "Balaam", "Achan", "Caiaphas", "Pontius", "Ahab", "Manasseh", "Rehoboam", "Nebuchadnezzar", "Delilah", "Lo-ammi", "Lo-ruhamah", "Beelzebub", "Ichabod", "Saphira", "Jushab-hesed", "Benjarman", "Cain", "Esau", "Machiavelli", # found "Barabbas", "Sapphira", "Shur", ] def download_file(url: str, dest_path: Path): if dest_path.exists(): print(f"{dest_path.name} already exists. Skipping download.") return print(f"Downloading {url}") response = requests.get(url) response.raise_for_status() with open(dest_path, "wb") as f: f.write(response.content) print(f"Saved to {dest_path}") # --- File download & setup --- def extract_names_zip(): zip_path = Path("names.zip") if not zip_path.exists(): raise FileNotFoundError("names.zip not found. Please upload it manually to the repo.") with ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(".") print("Unzipped names.zip") extract_names_zip() # Download Bible CSVs if missing download_file( "https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"), ) download_file( "https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"), ) # --- Load datasets --- ssa_name_txt_files = sorted(Path(".").glob("yob*.txt")) def load_ssa_names(): dfs = [] for f in ssa_name_txt_files: year = int(f.stem.replace("yob", "")) df = pd.read_csv(f, names=["name", "sex", "count"]) df["year"] = year dfs.append(df) full_df = pd.concat(dfs, ignore_index=True) agg_df = ( full_df .groupby(["name", "sex"], as_index=False)["count"] .sum() .sort_values("count", ascending=False) ) return full_df, agg_df ssa_names_df, ssa_names_aggregated_df = load_ssa_names() bible_names_df = pd.read_csv("BibleData-Person.csv") bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv") bible_names_personlabel_df = bible_names_personlabel_df.merge(bible_names_df[["person_id", "sex"]], on="person_id", how="left") bible_names_personlabel_df = bible_names_personlabel_df[bible_names_personlabel_df["label_type"] == "proper name"] bible_names_personlabel_df["sex"] = bible_names_personlabel_df["sex"].replace({"male": "M", "female": "F"}) # --- Name generation logic --- import random last_names = ["Smith", "Johnson", "Williams", "Taylor", "Brown"] def get_normal_and_bible( ssa_names_aggregated_df, bible_names_df, min_length_ssa=3, max_length_ssa=8, min_length_bible=3, max_length_bible=8, ssa_popularity_percentile=(0.95, 1.0), sex=None, forbidden_names=None, ssa_names_col="name", bible_names_col="english_label", debug=False, ): if forbidden_names is None: forbidden_names = set() filtered_ssa = ssa_names_aggregated_df.copy() filtered_ssa = filtered_ssa[ filtered_ssa[ssa_names_col].str.len().between(min_length_ssa, max_length_ssa) ] if sex: filtered_ssa = filtered_ssa[filtered_ssa["sex"] == sex] if debug: print(f"SSA names after length/sex filter: {len(filtered_ssa)}") total = len(filtered_ssa) filtered_ssa = filtered_ssa.sort_values("count") low, high = ssa_popularity_percentile idx_start = int(total * low) idx_end = int(total * high) filtered_ssa = filtered_ssa.iloc[idx_start:idx_end] if debug: print(f"SSA names after popularity percentile slice: {len(filtered_ssa)}") ssa_name = filtered_ssa.sample(1)[ssa_names_col].values[0] filtered_bible = bible_names_df.copy() filtered_bible = filtered_bible[ filtered_bible[bible_names_col].str.len().between(min_length_bible, max_length_bible) ] if sex: filtered_bible = filtered_bible[filtered_bible["sex"] == sex] filtered_bible = filtered_bible[~filtered_bible[bible_names_col].isin(forbidden_names)] if debug: print(f"Bible names after filtering: {len(filtered_bible)}") if len(filtered_bible) == 0 or len(filtered_ssa) == 0: raise ValueError("No valid names found after filtering.") bible_name = filtered_bible.sample(1)[bible_names_col].values[0] return ssa_name, bible_name # --- Gradio app --- def generate_names(n, sex, min_len, max_len, min_bible_len, max_bible_len, pop_low, pop_high, debug_flag, last, forbidden_names_text): results = [] debug_output = io.StringIO() forbidden_names = set(name.strip() for name in forbidden_names_text.split(",") if name.strip()) with contextlib.redirect_stdout(debug_output): for i in range(n): try: normal, bible = get_normal_and_bible( ssa_names_aggregated_df, bible_names_personlabel_df, min_length_ssa=min_len, max_length_ssa=max_len, min_length_bible=min_bible_len, max_length_bible=max_bible_len, ssa_popularity_percentile=(pop_low, pop_high), sex=sex if sex in {"M", "F"} else None, forbidden_names=forbidden_names, debug=(i==0), ) if last is None: last = random.choice(last_names) results.append(f"{bible} {normal} {last}") except Exception as e: results.append(f"[Error: {e}]") return "\n".join(results), debug_output.getvalue() with gr.Blocks() as demo: gr.Markdown("# 📜 Random Bible + SSA Name Generator") with gr.Row(): n_slider = gr.Slider(1, 20, value=5, step=1, label="How many names?") sex_choice = gr.Radio(["M", "F", "Any"], label="Sex", value="Any") with gr.Row(): ssa_len = gr.Slider(3, 40, value=3, step=1, label="SSA name min length") ssa_max_len = gr.Slider(3, 40, value=8, step=1, label="SSA name max length") with gr.Row(): bible_len = gr.Slider(3, 40, value=3, step=1, label="Bible name min length") bible_max_len = gr.Slider(3, 40, value=8, step=1, label="Bible name max length") with gr.Row(): pop_low_slider = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="SSA Popularity: Low Percentile") pop_high_slider = gr.Slider(0.0, 1.0, value=1.0, step=0.01, label="SSA Popularity: High Percentile") with gr.Row(): last_name_input = gr.Textbox(label="Last Name") with gr.Row(): forbidden_names_input = gr.Textbox(label="FORBIDDEN NAMES (comma-separated)", value=FORBIDDEN_NAMES) debug_checkbox = gr.Checkbox(label="Show debug output", value=True) generate_btn = gr.Button("🔀 Generate Names") output_box = gr.Textbox(label="Generated Names", lines=10) debug_box = gr.Textbox(label="Debug Output", lines=10) generate_btn.click( fn=generate_names, inputs=[ n_slider, sex_choice, ssa_len, ssa_max_len, bible_len, bible_max_len, pop_low_slider, pop_high_slider, debug_checkbox, last_name_input, forbidden_names_input, ], outputs=[output_box, debug_box], ) demo.launch()