import pandas as pd import numpy as np import random from datetime import datetime, timedelta def generate_fake_data(df, num_fake): # Options for random generation species_options = [ "beluga", "blue_whale", "bottlenose_dolphin", "brydes_whale", "commersons_dolphin", "common_dolphin", "cuviers_beaked_whale", "dusky_dolphin", "false_killer_whale", "fin_whale", "frasiers_dolphin", "gray_whale", "humpback_whale", "killer_whale", "long_finned_pilot_whale", "melon_headed_whale", "minke_whale", "pantropic_spotted_dolphin", "pygmy_killer_whale", "rough_toothed_dolphin", "sei_whale", "short_finned_pilot_whale", "southern_right_whale", "spinner_dolphin", "spotted_dolphin", "white_sided_dolphin", ] email_options = [ 'dr.marine@oceanic.org', 'whale.research@deepblue.org', 'observer@sea.net', 'super@whale.org' ] def random_ocean_coord(): """Generate random ocean-friendly coordinates.""" lat = random.uniform(-60, 60) # avoid poles lon = random.uniform(-180, 180) return lat, lon def random_date(start_year=2018, end_year=2025): """Generate a random date.""" start = datetime(start_year, 1, 1) end = datetime(end_year, 1, 1) return start + timedelta(days=random.randint(0, (end - start).days)) # Generate 20 new observations new_data = [] for _ in range(num_fake): lat, lon = random_ocean_coord() species = random.choice(species_options) email = random.choice(email_options) date = random_date() new_data.append([lat, lon, species, email, date]) # Create a DataFrame and append new_df = pd.DataFrame(new_data, columns=['lat', 'lon', 'species', 'author_email', 'date']) df = pd.concat([df, new_df], ignore_index=True) return df