Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -64,6 +64,7 @@ ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
|
|
64 |
bible_names_df = pd.read_csv("BibleData-Person.csv")
|
65 |
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
|
66 |
bible_names_personlabel_df = bible_names_personlabel_df.merge(bible_names_df[["person_id", "sex"]], on="person_id", how="left")
|
|
|
67 |
|
68 |
# --- Name generation logic ---
|
69 |
import random
|
@@ -80,14 +81,17 @@ def get_normal_and_bible(
|
|
80 |
ssa_popularity_percentile=(0.95, 1.0),
|
81 |
sex=None,
|
82 |
forbidden_names=None,
|
|
|
|
|
83 |
debug=False,
|
|
|
84 |
):
|
85 |
if forbidden_names is None:
|
86 |
forbidden_names = set()
|
87 |
|
88 |
filtered_ssa = ssa_names_aggregated_df.copy()
|
89 |
filtered_ssa = filtered_ssa[
|
90 |
-
filtered_ssa[
|
91 |
]
|
92 |
if sex:
|
93 |
filtered_ssa = filtered_ssa[filtered_ssa["sex"] == sex]
|
@@ -103,22 +107,22 @@ def get_normal_and_bible(
|
|
103 |
if debug:
|
104 |
print(f"SSA names after popularity percentile slice: {len(filtered_ssa)}")
|
105 |
|
106 |
-
ssa_name = filtered_ssa.sample(1)[
|
107 |
|
108 |
filtered_bible = bible_names_df.copy()
|
109 |
filtered_bible = filtered_bible[
|
110 |
-
filtered_bible[
|
111 |
]
|
112 |
if sex:
|
113 |
filtered_bible = filtered_bible[filtered_bible["sex"] == sex]
|
114 |
-
filtered_bible = filtered_bible[~filtered_bible[
|
115 |
if debug:
|
116 |
print(f"Bible names after filtering: {len(filtered_bible)}")
|
117 |
|
118 |
if len(filtered_bible) == 0 or len(filtered_ssa) == 0:
|
119 |
raise ValueError("No valid names found after filtering.")
|
120 |
|
121 |
-
bible_name = filtered_bible.sample(1)[
|
122 |
|
123 |
return ssa_name, bible_name
|
124 |
|
|
|
64 |
bible_names_df = pd.read_csv("BibleData-Person.csv")
|
65 |
bible_names_personlabel_df = pd.read_csv("BibleData-PersonLabel.csv")
|
66 |
bible_names_personlabel_df = bible_names_personlabel_df.merge(bible_names_df[["person_id", "sex"]], on="person_id", how="left")
|
67 |
+
bible_names_personlabel_df["sex"] = bible_names_personlabel_df["sex"].replace({"male": "M", "female": "F"})
|
68 |
|
69 |
# --- Name generation logic ---
|
70 |
import random
|
|
|
81 |
ssa_popularity_percentile=(0.95, 1.0),
|
82 |
sex=None,
|
83 |
forbidden_names=None,
|
84 |
+
ssa_names_col="name",
|
85 |
+
bible_names_col="english_label"
|
86 |
debug=False,
|
87 |
+
|
88 |
):
|
89 |
if forbidden_names is None:
|
90 |
forbidden_names = set()
|
91 |
|
92 |
filtered_ssa = ssa_names_aggregated_df.copy()
|
93 |
filtered_ssa = filtered_ssa[
|
94 |
+
filtered_ssa[ssa_names_col].str.len().between(min_length_ssa, max_length_ssa)
|
95 |
]
|
96 |
if sex:
|
97 |
filtered_ssa = filtered_ssa[filtered_ssa["sex"] == sex]
|
|
|
107 |
if debug:
|
108 |
print(f"SSA names after popularity percentile slice: {len(filtered_ssa)}")
|
109 |
|
110 |
+
ssa_name = filtered_ssa.sample(1)[ssa_names_col].values[0]
|
111 |
|
112 |
filtered_bible = bible_names_df.copy()
|
113 |
filtered_bible = filtered_bible[
|
114 |
+
filtered_bible[bible_names_col].str.len().between(min_length_bible, max_length_bible)
|
115 |
]
|
116 |
if sex:
|
117 |
filtered_bible = filtered_bible[filtered_bible["sex"] == sex]
|
118 |
+
filtered_bible = filtered_bible[~filtered_bible[bible_names_col].isin(forbidden_names)]
|
119 |
if debug:
|
120 |
print(f"Bible names after filtering: {len(filtered_bible)}")
|
121 |
|
122 |
if len(filtered_bible) == 0 or len(filtered_ssa) == 0:
|
123 |
raise ValueError("No valid names found after filtering.")
|
124 |
|
125 |
+
bible_name = filtered_bible.sample(1)[bible_names_col].values[0]
|
126 |
|
127 |
return ssa_name, bible_name
|
128 |
|