Commit
·
0c88eb4
1
Parent(s):
81d856c
filling missing values, include all gap codes
Browse files- preprocess.ipynb +42 -138
- preprocess.py +99 -0
preprocess.ipynb
CHANGED
|
@@ -12,114 +12,80 @@
|
|
| 12 |
"cell_type": "code",
|
| 13 |
"execution_count": null,
|
| 14 |
"id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
|
| 15 |
-
"metadata": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
"outputs": [],
|
| 17 |
"source": [
|
| 18 |
"import ibis\n",
|
| 19 |
"from ibis import _\n",
|
| 20 |
"\n",
|
| 21 |
-
"conn = ibis.duckdb.connect(\"
|
| 22 |
-
"ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
|
| 23 |
"# or use local copy:\n",
|
| 24 |
-
"ca_parquet = \"
|
| 25 |
]
|
| 26 |
},
|
| 27 |
{
|
| 28 |
"cell_type": "code",
|
| 29 |
"execution_count": null,
|
| 30 |
-
"id": "
|
| 31 |
"metadata": {},
|
| 32 |
"outputs": [],
|
| 33 |
"source": [
|
| 34 |
-
"buffer
|
|
|
|
| 35 |
"\n",
|
| 36 |
"tbl = (\n",
|
| 37 |
" conn.read_parquet(ca_parquet)\n",
|
| 38 |
" .cast({\"SHAPE\": \"geometry\"})\n",
|
| 39 |
" .rename(geom = \"SHAPE\")\n",
|
| 40 |
-
"
|
| 41 |
-
" .filter(_.reGAP < 3) \n",
|
| 42 |
")\n",
|
| 43 |
-
"
|
|
|
|
|
|
|
|
|
|
| 44 |
"tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
|
| 45 |
-
"intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))
|
| 46 |
-
"\n"
|
| 47 |
]
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"cell_type": "code",
|
| 51 |
"execution_count": null,
|
| 52 |
-
"id": "
|
| 53 |
-
"metadata": {},
|
| 54 |
-
"outputs": [],
|
| 55 |
-
"source": [
|
| 56 |
-
"## Testing, run only on subset data\n",
|
| 57 |
-
"if False:\n",
|
| 58 |
-
" gdf = intersects.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
|
| 59 |
-
" gdf_2023 = tbl_2023.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
|
| 60 |
-
" gdf_2024 = tbl_2024.mutate(geom = _.geom.convert(\"epsg:3310\",\"epsg:4326\")).execute()\n",
|
| 61 |
-
" # gdf = ca2024\n",
|
| 62 |
-
" established = {'property': 'established',\n",
|
| 63 |
-
" 'type': 'categorical',\n",
|
| 64 |
-
" 'stops': [\n",
|
| 65 |
-
" [2023, \"#26542C80\"], \n",
|
| 66 |
-
" [2024, \"#F3AB3D80\"]]\n",
|
| 67 |
-
" }\n",
|
| 68 |
-
" inter = {\"fill-color\": \"#F3AB3D\"}\n",
|
| 69 |
-
" p2024 = {\"fill-color\": \"#26542C\"}\n",
|
| 70 |
-
" p2023 = {\"fill-color\": \"#8B0A1A\"}\n",
|
| 71 |
-
" \n",
|
| 72 |
-
" m = leafmap.Map(style=\"positron\")\n",
|
| 73 |
-
" m.add_gdf(gdf_2024,layer_type=\"fill\", name = \"2024\", paint = p2024)\n",
|
| 74 |
-
" m.add_gdf(gdf_2023,layer_type=\"fill\", name = \"2023\", paint = p2023)\n",
|
| 75 |
-
" m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = inter)\n",
|
| 76 |
-
" \n",
|
| 77 |
-
" m.add_layer_control()\n",
|
| 78 |
-
" m"
|
| 79 |
-
]
|
| 80 |
-
},
|
| 81 |
-
{
|
| 82 |
-
"cell_type": "code",
|
| 83 |
-
"execution_count": null,
|
| 84 |
-
"id": "275c171a-f82f-4ee8-991c-1e34eb83a33d",
|
| 85 |
"metadata": {},
|
| 86 |
"outputs": [],
|
| 87 |
"source": [
|
| 88 |
"%%time\n",
|
| 89 |
"\n",
|
| 90 |
-
"new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024)\n",
|
| 91 |
"\n",
|
| 92 |
"ca = (conn\n",
|
| 93 |
" .read_parquet(ca_parquet)\n",
|
| 94 |
" .cast({\"SHAPE\": \"geometry\"})\n",
|
| 95 |
" .mutate(area = _.SHAPE.area())\n",
|
| 96 |
-
" .filter(_.Release_Year == 2024)
|
| 97 |
-
" .
|
| 98 |
-
" .
|
| 99 |
-
" .mutate(established=_.established.fill_null(2023))\n",
|
| 100 |
" .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
|
| 101 |
" .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n",
|
| 102 |
" manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
" .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
|
| 104 |
" _.Easement, _.Acres, _.id, _.type, _.geom)\n",
|
| 105 |
" )\n",
|
| 106 |
-
"ca2024 = ca.execute()\n",
|
| 107 |
-
"\n",
|
| 108 |
"\n",
|
| 109 |
-
"\n",
|
| 110 |
-
"ca2024.to_parquet(\"ca2024.parquet\")\n",
|
| 111 |
-
"\
|
| 112 |
-
]
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"cell_type": "code",
|
| 116 |
-
"execution_count": 2,
|
| 117 |
-
"id": "8259b450-2152-472c-a58c-50ce0d68d78f",
|
| 118 |
-
"metadata": {},
|
| 119 |
-
"outputs": [],
|
| 120 |
-
"source": [
|
| 121 |
-
"ca2024 = conn.read_parquet(\"ca2024.parquet\")\n",
|
| 122 |
-
"ca2024.execute().to_file(\"ca2024.geojson\") # tippecanoe can't parse geoparquet :-("
|
| 123 |
]
|
| 124 |
},
|
| 125 |
{
|
|
@@ -144,83 +110,21 @@
|
|
| 144 |
" repo_id=\"boettiger-lab/ca-30x30\",\n",
|
| 145 |
" repo_type=\"dataset\",\n",
|
| 146 |
" )\n",
|
| 147 |
-
"
|
| 148 |
-
|
| 149 |
-
},
|
| 150 |
-
{
|
| 151 |
-
"cell_type": "markdown",
|
| 152 |
-
"id": "cebd0ff5-8353-4b84-b9ee-182b74613554",
|
| 153 |
-
"metadata": {},
|
| 154 |
-
"source": [
|
| 155 |
-
"# Testing & visualization\n",
|
| 156 |
-
"\n",
|
| 157 |
-
"`ca2024.parquet()` now contains all we need. The code below illustrates some quick examples of the kinds of visualizations and summaries we might want to compute with this data. \n"
|
| 158 |
-
]
|
| 159 |
-
},
|
| 160 |
-
{
|
| 161 |
-
"cell_type": "code",
|
| 162 |
-
"execution_count": 1,
|
| 163 |
-
"id": "55afe07c-8681-4308-bbb9-e460f7380f86",
|
| 164 |
-
"metadata": {},
|
| 165 |
-
"outputs": [],
|
| 166 |
-
"source": [
|
| 167 |
-
"import leafmap.maplibregl as leafmap\n",
|
| 168 |
-
"import ibis\n",
|
| 169 |
-
"from ibis import _\n",
|
| 170 |
-
"conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
|
| 171 |
-
"\n",
|
| 172 |
-
"ca2024 = conn.read_parquet(\"ca2024.parquet\")"
|
| 173 |
-
]
|
| 174 |
-
},
|
| 175 |
-
{
|
| 176 |
-
"cell_type": "code",
|
| 177 |
-
"execution_count": null,
|
| 178 |
-
"id": "6f3df8c1-a603-4dd5-be84-8deaae928d0a",
|
| 179 |
-
"metadata": {},
|
| 180 |
-
"outputs": [],
|
| 181 |
-
"source": [
|
| 182 |
-
"# compute some summary tables:\n",
|
| 183 |
-
"\n",
|
| 184 |
-
"(ca2024\n",
|
| 185 |
-
" .filter(_.established == 2024)\n",
|
| 186 |
-
" .filter(_.manager_type == \"State\")\n",
|
| 187 |
-
" .group_by(_.manager, _.manager_type)\n",
|
| 188 |
-
" .agg(area = _.Acres.sum())\n",
|
| 189 |
-
" .order_by(_.area.desc())\n",
|
| 190 |
-
" .execute()\n",
|
| 191 |
-
")"
|
| 192 |
-
]
|
| 193 |
-
},
|
| 194 |
-
{
|
| 195 |
-
"cell_type": "code",
|
| 196 |
-
"execution_count": null,
|
| 197 |
-
"id": "c62854f6-1456-4207-8c69-53af17970102",
|
| 198 |
-
"metadata": {},
|
| 199 |
-
"outputs": [],
|
| 200 |
-
"source": [
|
| 201 |
-
"\n",
|
| 202 |
-
"gdf = ca2024.execute()\n",
|
| 203 |
-
"established = {'property': 'established',\n",
|
| 204 |
-
" 'type': 'categorical',\n",
|
| 205 |
-
" 'stops': [\n",
|
| 206 |
-
" [2023, \"#26542C80\"], \n",
|
| 207 |
-
" [2024, \"#F3AB3D80\"]]}\n",
|
| 208 |
-
"paint = {\"fill-color\": established}\n",
|
| 209 |
-
"\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"m = leafmap.Map(style=\"positron\")\n",
|
| 212 |
-
"m.add_gdf(gdf,layer_type=\"fill\", name = \"intersects\", paint = paint)\n",
|
| 213 |
-
"\n",
|
| 214 |
-
"m.add_layer_control()\n",
|
| 215 |
-
"m.to_html(\"ca2024.html\")\n",
|
| 216 |
-
"m"
|
| 217 |
]
|
| 218 |
},
|
| 219 |
{
|
| 220 |
"cell_type": "code",
|
| 221 |
"execution_count": null,
|
| 222 |
"id": "2df80e1d-6b94-4884-b9f5-d9c23d3ea028",
|
| 223 |
-
"metadata": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
"outputs": [],
|
| 225 |
"source": [
|
| 226 |
"import subprocess\n",
|
|
@@ -249,8 +153,8 @@
|
|
| 249 |
" except subprocess.CalledProcessError as e:\n",
|
| 250 |
" print(f\"Error running Tippecanoe: {e}\")\n",
|
| 251 |
"\n",
|
| 252 |
-
"generate_pmtiles(\"ca2024.geojson\", \"ca2024-tippe.pmtiles\")\n",
|
| 253 |
-
"hf_upload(\"ca2024-tippe.pmtiles\")"
|
| 254 |
]
|
| 255 |
}
|
| 256 |
],
|
|
|
|
| 12 |
"cell_type": "code",
|
| 13 |
"execution_count": null,
|
| 14 |
"id": "f7e6298c-d886-432a-a1b7-c3fee914c24f",
|
| 15 |
+
"metadata": {
|
| 16 |
+
"editable": true,
|
| 17 |
+
"slideshow": {
|
| 18 |
+
"slide_type": ""
|
| 19 |
+
},
|
| 20 |
+
"tags": []
|
| 21 |
+
},
|
| 22 |
"outputs": [],
|
| 23 |
"source": [
|
| 24 |
"import ibis\n",
|
| 25 |
"from ibis import _\n",
|
| 26 |
"\n",
|
| 27 |
+
"conn = ibis.duckdb.connect(\"tmp3\", extensions=[\"spatial\"])\n",
|
| 28 |
+
"# ca_parquet = \"https://data.source.coop/cboettig/ca30x30/ca_areas.parquet\"\n",
|
| 29 |
"# or use local copy:\n",
|
| 30 |
+
"ca_parquet = \"ca_areas.parquet\""
|
| 31 |
]
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"cell_type": "code",
|
| 35 |
"execution_count": null,
|
| 36 |
+
"id": "a3d4f189-1563-4868-9f1f-64d67569df27",
|
| 37 |
"metadata": {},
|
| 38 |
"outputs": [],
|
| 39 |
"source": [
|
| 40 |
+
"# negative buffer to account for overlapping boundaries. \n",
|
| 41 |
+
"buffer = -30 #30m buffer \n",
|
| 42 |
"\n",
|
| 43 |
"tbl = (\n",
|
| 44 |
" conn.read_parquet(ca_parquet)\n",
|
| 45 |
" .cast({\"SHAPE\": \"geometry\"})\n",
|
| 46 |
" .rename(geom = \"SHAPE\")\n",
|
| 47 |
+
" .filter(_.reGAP < 3) # only gap 1 and 2 count towards 30x30\n",
|
|
|
|
| 48 |
")\n",
|
| 49 |
+
"\n",
|
| 50 |
+
"# polygons with release_year 2024 are a superset of release_year 2023. \n",
|
| 51 |
+
"# use anti_join to isolate the objects that are in release_year 2024 but not release_year 2023 (aka newly established). \n",
|
| 52 |
+
"tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer)) \n",
|
| 53 |
"tbl_2024 = tbl.filter(_.Release_Year == 2024)\n",
|
| 54 |
+
"intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))"
|
|
|
|
| 55 |
]
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"cell_type": "code",
|
| 59 |
"execution_count": null,
|
| 60 |
+
"id": "a59c976b-3c36-40f9-a15b-cefcd155c647",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
"metadata": {},
|
| 62 |
"outputs": [],
|
| 63 |
"source": [
|
| 64 |
"%%time\n",
|
| 65 |
"\n",
|
| 66 |
+
"new2024 = intersects.select(\"OBJECTID\").mutate(established = 2024) # saving IDs to join on\n",
|
| 67 |
"\n",
|
| 68 |
"ca = (conn\n",
|
| 69 |
" .read_parquet(ca_parquet)\n",
|
| 70 |
" .cast({\"SHAPE\": \"geometry\"})\n",
|
| 71 |
" .mutate(area = _.SHAPE.area())\n",
|
| 72 |
+
" .filter(_.Release_Year == 2024) # having both 2023 and 2024 is redudant since 2024 is the superset.\n",
|
| 73 |
+
" .left_join(new2024, \"OBJECTID\") # newly established 2024 polygons \n",
|
| 74 |
+
" .mutate(established=_.established.fill_null(2023)) \n",
|
|
|
|
| 75 |
" .mutate(geom = _.SHAPE.convert(\"epsg:3310\",\"epsg:4326\"))\n",
|
| 76 |
" .rename(name = \"cpad_PARK_NAME\", access_type = \"cpad_ACCESS_TYP\", manager = \"cpad_MNG_AGENCY\",\n",
|
| 77 |
" manager_type = \"cpad_MNG_AG_LEV\", id = \"OBJECTID\", type = \"TYPE\")\n",
|
| 78 |
+
" .mutate(manager = _.manager.substitute({\"\": \"Unknown\"})) \n",
|
| 79 |
+
" .mutate(manager_type = _.manager_type.substitute({\"\": \"Unknown\"}))\n",
|
| 80 |
+
" .mutate(access_type = _.access_type.substitute({\"\": \"Unknown Access\"}))\n",
|
| 81 |
+
" .mutate(name = _.name.substitute({\"\": \"Unknown\"}))\n",
|
| 82 |
" .select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,\n",
|
| 83 |
" _.Easement, _.Acres, _.id, _.type, _.geom)\n",
|
| 84 |
" )\n",
|
|
|
|
|
|
|
| 85 |
"\n",
|
| 86 |
+
"ca2024 = ca.execute()\n",
|
| 87 |
+
"ca2024.to_parquet(\"ca2024-30m.parquet\")\n",
|
| 88 |
+
"ca2024.to_file(\"ca2024-30m.geojson\") # tippecanoe can't parse geoparquet :-("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
]
|
| 90 |
},
|
| 91 |
{
|
|
|
|
| 110 |
" repo_id=\"boettiger-lab/ca-30x30\",\n",
|
| 111 |
" repo_type=\"dataset\",\n",
|
| 112 |
" )\n",
|
| 113 |
+
" \n",
|
| 114 |
+
"hf_upload(\"ca2024-30m.parquet\")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
]
|
| 116 |
},
|
| 117 |
{
|
| 118 |
"cell_type": "code",
|
| 119 |
"execution_count": null,
|
| 120 |
"id": "2df80e1d-6b94-4884-b9f5-d9c23d3ea028",
|
| 121 |
+
"metadata": {
|
| 122 |
+
"editable": true,
|
| 123 |
+
"slideshow": {
|
| 124 |
+
"slide_type": ""
|
| 125 |
+
},
|
| 126 |
+
"tags": []
|
| 127 |
+
},
|
| 128 |
"outputs": [],
|
| 129 |
"source": [
|
| 130 |
"import subprocess\n",
|
|
|
|
| 153 |
" except subprocess.CalledProcessError as e:\n",
|
| 154 |
" print(f\"Error running Tippecanoe: {e}\")\n",
|
| 155 |
"\n",
|
| 156 |
+
"generate_pmtiles(\"ca2024-30m.geojson\", \"ca2024-30m-tippe.pmtiles\")\n",
|
| 157 |
+
"hf_upload(\"ca2024-30m-tippe.pmtiles\")"
|
| 158 |
]
|
| 159 |
}
|
| 160 |
],
|
preprocess.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ibis
|
| 2 |
+
from ibis import _
|
| 3 |
+
|
| 4 |
+
conn = ibis.duckdb.connect("tmp3", extensions=["spatial"])
|
| 5 |
+
# ca_parquet = "https://data.source.coop/cboettig/ca30x30/ca_areas.parquet"
|
| 6 |
+
# or use local copy:
|
| 7 |
+
ca_parquet = "ca_areas.parquet"
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# negative buffer to account for overlapping boundaries.
|
| 11 |
+
buffer = -30 #30m buffer
|
| 12 |
+
|
| 13 |
+
tbl = (
|
| 14 |
+
conn.read_parquet(ca_parquet)
|
| 15 |
+
.cast({"SHAPE": "geometry"})
|
| 16 |
+
.rename(geom = "SHAPE")
|
| 17 |
+
.filter(_.reGAP < 3) # only gap 1 and 2 count towards 30x30
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
# polygons with release_year 2024 are a superset of release_year 2023.
|
| 21 |
+
# use anti_join to isolate the objects that are in release_year 2024 but not release_year 2023 (aka newly established).
|
| 22 |
+
tbl_2023 = tbl.filter(_.Release_Year == 2023).mutate(geom=_.geom.buffer(buffer))
|
| 23 |
+
tbl_2024 = tbl.filter(_.Release_Year == 2024)
|
| 24 |
+
intersects = tbl_2024.anti_join(tbl_2023, _.geom.intersects(tbl_2023.geom))
|
| 25 |
+
|
| 26 |
+
new2024 = intersects.select("OBJECTID").mutate(established = 2024) # saving IDs to join on
|
| 27 |
+
|
| 28 |
+
ca = (conn
|
| 29 |
+
.read_parquet(ca_parquet)
|
| 30 |
+
.cast({"SHAPE": "geometry"})
|
| 31 |
+
.mutate(area = _.SHAPE.area())
|
| 32 |
+
.filter(_.Release_Year == 2024) # having both 2023 and 2024 is redudant since 2024 is the superset.
|
| 33 |
+
.left_join(new2024, "OBJECTID") # newly established 2024 polygons
|
| 34 |
+
.mutate(established=_.established.fill_null(2023))
|
| 35 |
+
.mutate(geom = _.SHAPE.convert("epsg:3310","epsg:4326"))
|
| 36 |
+
.rename(name = "cpad_PARK_NAME", access_type = "cpad_ACCESS_TYP", manager = "cpad_MNG_AGENCY",
|
| 37 |
+
manager_type = "cpad_MNG_AG_LEV", id = "OBJECTID", type = "TYPE")
|
| 38 |
+
.mutate(manager = _.manager.substitute({"": "Unknown"}))
|
| 39 |
+
.mutate(manager_type = _.manager_type.substitute({"": "Unknown"}))
|
| 40 |
+
.mutate(access_type = _.access_type.substitute({"": "Unknown Access"}))
|
| 41 |
+
.mutate(name = _.name.substitute({"": "Unknown"}))
|
| 42 |
+
.select(_.established, _.reGAP, _.name, _.access_type, _.manager, _.manager_type,
|
| 43 |
+
_.Easement, _.Acres, _.id, _.type, _.geom)
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
ca2024 = ca.execute()
|
| 47 |
+
|
| 48 |
+
ca2024.to_parquet("ca2024-30m.parquet")
|
| 49 |
+
|
| 50 |
+
ca2024.to_file("ca2024-30m.geojson") # tippecanoe can't parse geoparquet :-(
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
## Upload to Huggingface
|
| 54 |
+
# https://huggingface.co/datasets/boettiger-lab/ca-30x30/
|
| 55 |
+
|
| 56 |
+
from huggingface_hub import HfApi, login
|
| 57 |
+
import streamlit as st
|
| 58 |
+
login(st.secrets["HF_TOKEN"])
|
| 59 |
+
api = HfApi()
|
| 60 |
+
|
| 61 |
+
def hf_upload(file):
|
| 62 |
+
info = api.upload_file(
|
| 63 |
+
path_or_fileobj=file,
|
| 64 |
+
path_in_repo=file,
|
| 65 |
+
repo_id="boettiger-lab/ca-30x30",
|
| 66 |
+
repo_type="dataset",
|
| 67 |
+
)
|
| 68 |
+
hf_upload("ca2024-30m.parquet")
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
import subprocess
|
| 73 |
+
import os
|
| 74 |
+
|
| 75 |
+
def generate_pmtiles(input_file, output_file, max_zoom=12):
|
| 76 |
+
# Ensure Tippecanoe is installed
|
| 77 |
+
if subprocess.call(["which", "tippecanoe"], stdout=subprocess.DEVNULL) != 0:
|
| 78 |
+
raise RuntimeError("Tippecanoe is not installed or not in PATH")
|
| 79 |
+
|
| 80 |
+
# Construct the Tippecanoe command
|
| 81 |
+
command = [
|
| 82 |
+
"tippecanoe",
|
| 83 |
+
"-o", output_file,
|
| 84 |
+
"-z", str(max_zoom),
|
| 85 |
+
"--drop-densest-as-needed",
|
| 86 |
+
"--extend-zooms-if-still-dropping",
|
| 87 |
+
"--force",
|
| 88 |
+
input_file
|
| 89 |
+
]
|
| 90 |
+
|
| 91 |
+
# Run Tippecanoe
|
| 92 |
+
try:
|
| 93 |
+
subprocess.run(command, check=True)
|
| 94 |
+
print(f"Successfully generated PMTiles file: {output_file}")
|
| 95 |
+
except subprocess.CalledProcessError as e:
|
| 96 |
+
print(f"Error running Tippecanoe: {e}")
|
| 97 |
+
|
| 98 |
+
generate_pmtiles("ca2024-30m.geojson", "ca2024-30m-tippe.pmtiles")
|
| 99 |
+
hf_upload("ca2024-30m-tippe.pmtiles")
|