Joshua Sundance Bailey commited on
Commit
2f2f700
·
unverified ·
2 Parent(s): c0d16d7 96b42cc

Merge pull request #21 from connorsutton/main

Browse files
.github/workflows/docker-hub.yml CHANGED
@@ -23,6 +23,7 @@ jobs:
23
  - name: Build Docker image
24
  run: |
25
  docker build \
 
26
  -t joshuasundance/geospatial-data-converter:${{ github.ref_name }} \
27
  -t joshuasundance/geospatial-data-converter:latest \
28
  .
 
23
  - name: Build Docker image
24
  run: |
25
  docker build \
26
+ --target runtime \
27
  -t joshuasundance/geospatial-data-converter:${{ github.ref_name }} \
28
  -t joshuasundance/geospatial-data-converter:latest \
29
  .
Dockerfile CHANGED
@@ -1,4 +1,4 @@
1
- FROM python:3.11-slim-bookworm
2
 
3
  RUN adduser --uid 1000 --disabled-password --gecos '' appuser
4
  USER 1000
@@ -10,11 +10,18 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
10
  RUN pip install --user --no-cache-dir --upgrade pip
11
  COPY ./requirements.txt /home/appuser/requirements.txt
12
  RUN pip install --user --no-cache-dir --upgrade -r /home/appuser/requirements.txt
13
-
14
  COPY geospatial-data-converter/ /home/appuser/geospatial-data-converter/
15
 
 
 
 
 
 
 
 
 
 
16
  WORKDIR /workspace
17
  EXPOSE 7860
18
 
19
  CMD ["streamlit", "run", "/home/appuser/geospatial-data-converter/app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--server.enableXsrfProtection=false"]
20
- #CMD ["/bin/bash"]
 
1
+ FROM python:3.11-slim-bookworm AS base
2
 
3
  RUN adduser --uid 1000 --disabled-password --gecos '' appuser
4
  USER 1000
 
10
  RUN pip install --user --no-cache-dir --upgrade pip
11
  COPY ./requirements.txt /home/appuser/requirements.txt
12
  RUN pip install --user --no-cache-dir --upgrade -r /home/appuser/requirements.txt
 
13
  COPY geospatial-data-converter/ /home/appuser/geospatial-data-converter/
14
 
15
+
16
+ FROM base AS test
17
+
18
+ COPY dev-requirements.txt /home/appuser/dev-requirements.txt
19
+ RUN pip install --user --no-cache-dir --upgrade -r /home/appuser/dev-requirements.txt
20
+
21
+
22
+ FROM base AS runtime
23
+
24
  WORKDIR /workspace
25
  EXPOSE 7860
26
 
27
  CMD ["streamlit", "run", "/home/appuser/geospatial-data-converter/app.py", "--server.port", "7860", "--server.address", "0.0.0.0", "--server.enableXsrfProtection=false"]
 
README.md CHANGED
@@ -39,7 +39,7 @@ This project showcases a simple geospatial data converter using [Streamlit](http
39
  - User-friendly interface for easy data conversion
40
  - Supports conversion from the following input formats:
41
  - ArcGIS featurelayer URL
42
- - Uploaded file: KML, KMZ, GeoJSON, ZIP
43
  - Provides data in the selected output format
44
  - Presents data preview (geometry omitted for display purposes)
45
  - Download button for the converted data
@@ -57,6 +57,9 @@ This project showcases a simple geospatial data converter using [Streamlit](http
57
  2. Run in terminal: `docker compose up`
58
  3. Open http://localhost:7860 in your browser
59
 
 
 
 
60
  ## Kubernetes
61
  1. Clone the repo. Navigate to cloned repo directory
62
  2. Run bash script: `/bin/bash ./kubernetes/deploy.sh`
 
39
  - User-friendly interface for easy data conversion
40
  - Supports conversion from the following input formats:
41
  - ArcGIS featurelayer URL
42
+ - Uploaded file: KML, KMZ, GeoJSON, ZIP, etc
43
  - Provides data in the selected output format
44
  - Presents data preview (geometry omitted for display purposes)
45
  - Download button for the converted data
 
57
  2. Run in terminal: `docker compose up`
58
  3. Open http://localhost:7860 in your browser
59
 
60
+ ## Run Tests (with local Docker container)
61
+ 1. Run in terminal: `docker compose run test`
62
+
63
  ## Kubernetes
64
  1. Clone the repo. Navigate to cloned repo directory
65
  2. Run bash script: `/bin/bash ./kubernetes/deploy.sh`
dev-requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ pytest==7.4.3
2
+ pytest-cov==4.1.0
docker-compose.yml CHANGED
@@ -1,16 +1,26 @@
1
  version: '3.8'
2
 
3
  services:
 
 
 
 
 
 
 
 
 
 
 
 
4
  geospatial-data-converter:
5
- image: geospatial-data-converter:latest
6
- build: .
 
 
 
 
 
 
7
  ports:
8
  - "${APP_PORT:-7860}:${APP_PORT:-7860}"
9
- working_dir: /workspace
10
- command: [
11
- "streamlit", "run",
12
- "/home/appuser/geospatial-data-converter/app.py",
13
- "--server.port", "${APP_PORT:-7860}",
14
- "--server.address", "0.0.0.0",
15
- "--server.enableXsrfProtection=false"
16
- ]
 
1
  version: '3.8'
2
 
3
  services:
4
+ test:
5
+ container_name: geospatial-data-converter-test
6
+ build:
7
+ context: .
8
+ dockerfile: Dockerfile
9
+ target: test
10
+ volumes:
11
+ - ./tests/:/home/appuser/tests/
12
+ - ./pyproject.toml:/home/appuser/pyproject.toml
13
+ working_dir: /home/appuser
14
+ command: ["pytest"]
15
+
16
  geospatial-data-converter:
17
+ container_name: geospatial-data-converter
18
+ build:
19
+ context: .
20
+ dockerfile: Dockerfile
21
+ target: runtime
22
+ depends_on:
23
+ test:
24
+ condition: service_completed_successfully
25
  ports:
26
  - "${APP_PORT:-7860}:${APP_PORT:-7860}"
 
 
 
 
 
 
 
 
geospatial-data-converter/app.py CHANGED
@@ -68,8 +68,6 @@ elif st.session_state.uploaded_file is not None:
68
  os.path.basename(st.session_state.uploaded_file.name),
69
  )
70
  st.session_state.gdf = read_file(st.session_state.uploaded_file)
71
- # except AttributeError:
72
- # pass # there is a lingering file from the previous instance, but it can be ignored
73
 
74
  if st.session_state.gdf is not None:
75
  st.selectbox(
 
68
  os.path.basename(st.session_state.uploaded_file.name),
69
  )
70
  st.session_state.gdf = read_file(st.session_state.uploaded_file)
 
 
71
 
72
  if st.session_state.gdf is not None:
73
  st.selectbox(
geospatial-data-converter/kml_tricks.py CHANGED
@@ -52,7 +52,7 @@ def parse_descriptions_to_geodf(geodf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
52
 
53
  def swap_coordinates(geometry):
54
  """
55
- Swap the latitude and longitude of Shapely Point, LineString, Polygon,
56
  MultiPoint, MultiLineString, MultiPolygon, or LinearRing geometry.
57
 
58
  Parameters:
@@ -131,7 +131,7 @@ def extract_data_from_kml_code(kml_code: str) -> pd.DataFrame:
131
  """Extracts data from KML code into a DataFrame using SimpleData tags, excluding embedded tables in feature descriptions"""
132
 
133
  # Parse the KML source code
134
- soup = bs4.BeautifulSoup(kml_code, "html.parser")
135
 
136
  # Find all SchemaData tags (representing rows)
137
  schema_data_tags = soup.find_all("schemadata")
 
52
 
53
  def swap_coordinates(geometry):
54
  """
55
+ Swap the latitude and longitude of a shapely Point, LineString, Polygon,
56
  MultiPoint, MultiLineString, MultiPolygon, or LinearRing geometry.
57
 
58
  Parameters:
 
131
  """Extracts data from KML code into a DataFrame using SimpleData tags, excluding embedded tables in feature descriptions"""
132
 
133
  # Parse the KML source code
134
+ soup = bs4.BeautifulSoup(kml_code, features="xml")
135
 
136
  # Find all SchemaData tags (representing rows)
137
  schema_data_tags = soup.find_all("schemadata")
geospatial-data-converter/utils.py CHANGED
@@ -1,18 +1,20 @@
1
  import io
2
  import os
3
  import zipfile
4
- from tempfile import TemporaryDirectory
5
- from typing import BinaryIO
6
  import geopandas as gpd
 
7
 
 
 
8
  from kml_tricks import load_ge_data
9
 
10
  output_format_dict = {
11
- "ESRI Shapefile": ("shp", "zip", "application/zip"), # must be zipped
12
- "OpenFileGDB": ("gdb", "zip", "application/zip"), # must be zipped
13
- "GeoJSON": ("geojson", "geojson", "application/geo+json"),
14
  "CSV": ("csv", "csv", "text/csv"),
15
  "KML": ("kml", "kml", "application/vnd.google-earth.kml+xml"),
 
 
 
 
16
  }
17
 
18
 
@@ -22,7 +24,7 @@ def read_file(file: BinaryIO, *args, **kwargs) -> gpd.GeoDataFrame:
22
  ext = ext.lower().strip(".")
23
  if ext == "zip":
24
  with TemporaryDirectory() as tmp_dir:
25
- tmp_file_path = os.path.join(tmp_dir, file.name)
26
  with open(tmp_file_path, "wb") as tmp_file:
27
  tmp_file.write(file.read())
28
  return gpd.read_file(
@@ -33,7 +35,7 @@ def read_file(file: BinaryIO, *args, **kwargs) -> gpd.GeoDataFrame:
33
  )
34
  elif ext in ("kml", "kmz"):
35
  with TemporaryDirectory() as tmp_dir:
36
- tmp_file_path = os.path.join(tmp_dir, file.name)
37
  with open(tmp_file_path, "wb") as tmp_file:
38
  tmp_file.write(file.read())
39
  return load_ge_data(tmp_file_path)
@@ -60,13 +62,12 @@ def convert(gdf: gpd.GeoDataFrame, output_name: str, output_format: str) -> byte
60
  """Convert a GeoDataFrame to the specified format"""
61
  with TemporaryDirectory() as tmpdir:
62
  out_path = os.path.join(tmpdir, output_name)
63
-
64
  if output_format == "CSV":
65
  gdf.to_csv(out_path)
 
 
 
66
  else:
67
- # if output_format == 'KML': # need to longitude and latitude columns
68
- # # gdf = gdf.apply(lambda row: swap_coordinates(row), axis=1)
69
- # gdf['geometry'] = gdf['geometry'].apply(swap_coordinates)
70
  gdf.to_file(out_path, driver=output_format, engine="pyogrio")
71
 
72
  if output_format in ("ESRI Shapefile", "OpenFileGDB"):
 
1
  import io
2
  import os
3
  import zipfile
 
 
4
  import geopandas as gpd
5
+ import topojson
6
 
7
+ from tempfile import TemporaryDirectory
8
+ from typing import BinaryIO
9
  from kml_tricks import load_ge_data
10
 
11
  output_format_dict = {
 
 
 
12
  "CSV": ("csv", "csv", "text/csv"),
13
  "KML": ("kml", "kml", "application/vnd.google-earth.kml+xml"),
14
+ "GeoJSON": ("geojson", "geojson", "application/geo+json"),
15
+ "TopoJSON": ("topojson", "topojson", "application/json"),
16
+ "ESRI Shapefile": ("shp", "zip", "application/zip"), # must be zipped
17
+ "OpenFileGDB": ("gdb", "zip", "application/zip"), # must be zipped
18
  }
19
 
20
 
 
24
  ext = ext.lower().strip(".")
25
  if ext == "zip":
26
  with TemporaryDirectory() as tmp_dir:
27
+ tmp_file_path = os.path.join(tmp_dir, f"{basename}.{ext}")
28
  with open(tmp_file_path, "wb") as tmp_file:
29
  tmp_file.write(file.read())
30
  return gpd.read_file(
 
35
  )
36
  elif ext in ("kml", "kmz"):
37
  with TemporaryDirectory() as tmp_dir:
38
+ tmp_file_path = os.path.join(tmp_dir, f"{basename}.{ext}")
39
  with open(tmp_file_path, "wb") as tmp_file:
40
  tmp_file.write(file.read())
41
  return load_ge_data(tmp_file_path)
 
62
  """Convert a GeoDataFrame to the specified format"""
63
  with TemporaryDirectory() as tmpdir:
64
  out_path = os.path.join(tmpdir, output_name)
 
65
  if output_format == "CSV":
66
  gdf.to_csv(out_path)
67
+ elif output_format == "TopoJSON":
68
+ topojson_data = topojson.Topology(gdf)
69
+ topojson_data.to_json(out_path)
70
  else:
 
 
 
71
  gdf.to_file(out_path, driver=output_format, engine="pyogrio")
72
 
73
  if output_format in ("ESRI Shapefile", "OpenFileGDB"):
bumpver.toml → pyproject.toml RENAMED
@@ -1,4 +1,9 @@
1
- [bumpver]
 
 
 
 
 
2
  current_version = "0.2.1"
3
  version_pattern = "MAJOR.MINOR.PATCH"
4
  commit_message = "bump version {old_version} -> {new_version}"
@@ -10,7 +15,7 @@ commit = true
10
  tag = true
11
  push = true
12
 
13
- [bumpver.file_patterns]
14
  "bumpver.toml" = [
15
  'current_version = "{version}"',
16
  ]
 
1
+ [tool.pytest.ini_options]
2
+ pythonpath = ["geospatial-data-converter"]
3
+ testpaths = ["tests"]
4
+ addopts = "--cov=geospatial-data-converter"
5
+
6
+ [tool.bumpver]
7
  current_version = "0.2.1"
8
  version_pattern = "MAJOR.MINOR.PATCH"
9
  commit_message = "bump version {old_version} -> {new_version}"
 
15
  tag = true
16
  push = true
17
 
18
+ [tool.bumpver.file_patterns]
19
  "bumpver.toml" = [
20
  'current_version = "{version}"',
21
  ]
requirements.txt CHANGED
@@ -2,6 +2,8 @@ aiohttp==3.9.1
2
  beautifulsoup4==4.12.2
3
  geopandas==0.14.1
4
  lxml==4.9.3
 
5
  pyogrio==0.7.2
6
  restgdf==0.9.7
7
  streamlit==1.29.0
 
 
2
  beautifulsoup4==4.12.2
3
  geopandas==0.14.1
4
  lxml==4.9.3
5
+ numpy==1.26.2 # pinned for snyk
6
  pyogrio==0.7.2
7
  restgdf==0.9.7
8
  streamlit==1.29.0
9
+ topojson==1.7
tests/test_conversions.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pytest
3
+ from utils import convert, read_file, output_format_dict
4
+
5
+ input_exts = ["kml", "kmz", "geojson", "zip"]
6
+ output_exts = output_format_dict.keys()
7
+
8
+
9
+ @pytest.mark.parametrize("in_ext", input_exts)
10
+ @pytest.mark.parametrize("out_ext", output_exts)
11
+ def test_coversion(in_ext: str, out_ext: str) -> None:
12
+ test_file = f"test.{in_ext}"
13
+ test_file_path = os.path.join(os.getcwd(), "tests", "test_data", test_file)
14
+ with open(test_file_path, "rb") as f:
15
+ in_file = read_file(f)
16
+ out_file = f"test.{output_format_dict[out_ext][0]}"
17
+ converted_data = convert(in_file, out_file, out_ext)
18
+ with open("test.kml", "wb") as f:
19
+ f.write(converted_data)
tests/test_data/test.geojson ADDED
The diff for this file is too large to render. See raw diff
 
tests/test_data/test.kml ADDED
The diff for this file is too large to render. See raw diff
 
tests/test_data/test.kmz ADDED
Binary file (687 Bytes). View file
 
tests/test_data/test.zip ADDED
Binary file (96.7 kB). View file