cdleong commited on
Commit
e3d7a97
·
verified ·
1 Parent(s): 69c1687

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -5,15 +5,14 @@ from pathlib import Path
5
  from zipfile import ZipFile
6
 
7
  # ------------------
8
- # Download Resources
9
  # ------------------
10
  def download_file(url: str, output_path: Path):
11
  if output_path.exists():
12
  print(f"Skipping {output_path.name}, already exists.")
13
  return
14
  print(f"Downloading {url}")
15
- headers = {"User-Agent": "Mozilla/5.0"} # Bypass basic bot protections
16
- response = requests.get(url, headers=headers)
17
  response.raise_for_status()
18
  output_path.write_bytes(response.content)
19
  print(f"Saved to {output_path}")
@@ -21,18 +20,22 @@ def download_file(url: str, output_path: Path):
21
 
22
  # Download files
23
  download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
24
- download_file("https://www.ssa.gov/oact/babynames/names.zip", Path("names.zip"))
25
  download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))
26
 
27
- # Unzip names.zip
28
- with ZipFile("names.zip", 'r') as zip_ref:
29
- zip_ref.extractall(".")
30
- print("Unzipped names.zip")
 
 
 
 
 
 
31
 
32
  # ------------------
33
  # Load Data
34
  # ------------------
35
- ssa_name_txt_files = list(Path(".").glob("yob*.txt"))
36
 
37
  def load_ssa_names():
38
  ssa_dfs = []
@@ -60,7 +63,7 @@ def load_bible_names():
60
  )
61
  return bible_names_df
62
 
63
- # Load on startup
64
  ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
65
  bible_names_df = load_bible_names()
66
 
 
5
  from zipfile import ZipFile
6
 
7
  # ------------------
8
+ # Data prep
9
  # ------------------
10
  def download_file(url: str, output_path: Path):
11
  if output_path.exists():
12
  print(f"Skipping {output_path.name}, already exists.")
13
  return
14
  print(f"Downloading {url}")
15
+ response = requests.get(url)
 
16
  response.raise_for_status()
17
  output_path.write_bytes(response.content)
18
  print(f"Saved to {output_path}")
 
20
 
21
  # Download files
22
  download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-Person.csv", Path("BibleData-Person.csv"))
 
23
  download_file("https://raw.githubusercontent.com/BradyStephenson/bible-data/refs/heads/main/BibleData-PersonLabel.csv", Path("BibleData-PersonLabel.csv"))
24
 
25
+ # Assume `names.zip` is already in the repo
26
+ if not Path("names").exists():
27
+ print("Unzipping local names.zip...")
28
+ with ZipFile("names.zip", 'r') as zip_ref:
29
+ zip_ref.extractall("names")
30
+ else:
31
+ print("SSA name files already extracted.")
32
+
33
+ # Define where to find SSA name files
34
+ ssa_name_txt_files = list(Path("names").glob("yob*.txt"))
35
 
36
  # ------------------
37
  # Load Data
38
  # ------------------
 
39
 
40
  def load_ssa_names():
41
  ssa_dfs = []
 
63
  )
64
  return bible_names_df
65
 
66
+ # Load data on startup
67
  ssa_names_df, ssa_names_aggregated_df = load_ssa_names()
68
  bible_names_df = load_bible_names()
69