Update src/streamlit_app.py
Browse files- src/streamlit_app.py +46 -76
src/streamlit_app.py
CHANGED
@@ -18,18 +18,15 @@ st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset fr
|
|
18 |
def load_and_process_data():
|
19 |
"""
|
20 |
Loads the geospatial data from Hugging Face, processes relevant columns,
|
21 |
-
|
22 |
"""
|
23 |
try:
|
24 |
-
# Load the geospatial data using geopandas
|
25 |
-
# Ensure you have 'huggingface_hub', 'geopandas', 'fiona', 'pyproj', 'shapely' installed.
|
26 |
gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
|
27 |
except Exception as e:
|
28 |
st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
|
29 |
st.stop()
|
30 |
|
31 |
# Process STATEDAREA to numeric (Lot Area in Sq Metres)
|
32 |
-
# The format is like "17366.998291 sq.m"
|
33 |
def parse_stated_area(area_str):
|
34 |
if pd.isna(area_str):
|
35 |
return np.nan
|
@@ -44,7 +41,6 @@ def load_and_process_data():
|
|
44 |
gdf['zn_type'] = gdf['FEATURE_TYPE']
|
45 |
|
46 |
# Generate synthetic data for attributes not present in the Hugging Face dataset
|
47 |
-
# but required for the filter functionality as per the original HTML.
|
48 |
num_rows = len(gdf)
|
49 |
gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
|
50 |
gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
|
@@ -56,11 +52,24 @@ def load_and_process_data():
|
|
56 |
gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")
|
57 |
|
58 |
# Ensure geometries are valid for centroid calculation and plotting
|
59 |
-
# .buffer(0) is a common trick to fix minor geometry issues
|
60 |
gdf['geometry'] = gdf['geometry'].buffer(0)
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
# Select and reorder relevant columns for display and filtering
|
66 |
df_processed = gdf[[
|
@@ -75,9 +84,10 @@ def load_and_process_data():
|
|
75 |
df = load_and_process_data()
|
76 |
|
77 |
# Initialize filtered_df with the full dataframe for initial state
|
|
|
78 |
filtered_df = df.copy()
|
79 |
|
80 |
-
# --- 2. Initialize the Folium Map
|
81 |
# Center the map around the mean of the actual data's centroids
|
82 |
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
|
83 |
|
@@ -90,7 +100,7 @@ draw = Draw(
|
|
90 |
"polyline": False, "rectangle": False, "circlemarker": False,
|
91 |
"circle": False, "marker": False,
|
92 |
"polygon": {
|
93 |
-
"allowIntersection": False,
|
94 |
"drawError": {"color": "#e0115f", "message": "Oups!"},
|
95 |
"shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
|
96 |
},
|
@@ -99,29 +109,8 @@ draw = Draw(
|
|
99 |
)
|
100 |
m.add_child(draw)
|
101 |
|
102 |
-
# Add a sample of points to the initial map for responsiveness
|
103 |
-
# Plotting all 500k+ polygons/points at once can cause performance issues.
|
104 |
-
sample_df_for_initial_map = df.sample(min(1000, len(df)), random_state=42) # Sample up to 1000 points
|
105 |
-
|
106 |
-
for idx, row in sample_df_for_initial_map.iterrows():
|
107 |
-
folium.CircleMarker(
|
108 |
-
location=[row['latitude'], row['longitude']],
|
109 |
-
radius=3, # Smaller radius for denser data points
|
110 |
-
color='blue',
|
111 |
-
fill=True,
|
112 |
-
fill_color='blue',
|
113 |
-
fill_opacity=0.5,
|
114 |
-
tooltip=(
|
115 |
-
f"Parcel ID: {row['PARCELID']}<br>Name: {row['name']}<br>Zoning: {row['zn_type']}<br>"
|
116 |
-
f"Area: {row['zn_area'] if pd.notna(row['zn_area']) else 'N/A'} m²<br>"
|
117 |
-
f"FSI: {row['fsi_total']}<br>Coverage: {row['prcnt_cver']}%<br>"
|
118 |
-
f"Height: {row['height_metres']}m<br>Stories: {row['stories']}<br>"
|
119 |
-
f"Address: {row['ADDRESS_NUMBER'] if pd.notna(row['ADDRESS_NUMBER']) else ''} {row['LINEAR_NAME_FULL'] if pd.notna(row['LINEAR_NAME_FULL']) else ''}"
|
120 |
-
)
|
121 |
-
).add_to(m)
|
122 |
-
|
123 |
st.subheader("Draw a Polygon on the Map")
|
124 |
-
st.info(
|
125 |
output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])
|
126 |
|
127 |
polygon_drawn = False
|
@@ -147,12 +136,12 @@ if output and output["all_draw_features"]:
|
|
147 |
lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
|
148 |
axis=1
|
149 |
)
|
150 |
-
].copy()
|
151 |
st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
|
152 |
else:
|
153 |
-
st.info("Draw a polygon on the map to spatially filter properties.")
|
154 |
else:
|
155 |
-
st.info("Draw a polygon on the map to spatially filter properties.")
|
156 |
|
157 |
# --- 3. Attribute Filtering Form ---
|
158 |
st.subheader("Filter Property Attributes")
|
@@ -161,13 +150,9 @@ with st.form("attribute_filters"):
|
|
161 |
col1, col2 = st.columns(2)
|
162 |
|
163 |
with col1:
|
164 |
-
# Zoning Type filter
|
165 |
-
# Get unique zoning types from the loaded data, including a default 'All' option
|
166 |
all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
|
167 |
selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")
|
168 |
|
169 |
-
# Lot Area in Sq Metres filter
|
170 |
-
# Use actual min/max from data for number input range
|
171 |
min_zn_area = st.number_input(
|
172 |
"Minimum Lot Area in Sq Metres",
|
173 |
min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
|
@@ -176,41 +161,35 @@ with st.form("attribute_filters"):
|
|
176 |
key="zn_area_input"
|
177 |
)
|
178 |
|
179 |
-
# Floor Space Index (FSI) filter - Synthetic data
|
180 |
min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")
|
181 |
|
182 |
with col2:
|
183 |
-
# Building Percent Coverage filter - Synthetic data
|
184 |
max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")
|
185 |
|
186 |
-
# Height or Stories selection - Synthetic data
|
187 |
height_stories_option = st.radio(
|
188 |
"Filter by",
|
189 |
("Height", "Stories"),
|
190 |
-
index=0,
|
191 |
key="height_stories_radio"
|
192 |
)
|
193 |
|
194 |
-
# Single input field for height/stories, label changes dynamically
|
195 |
if height_stories_option == "Height":
|
196 |
min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
|
197 |
-
else:
|
198 |
min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")
|
199 |
|
200 |
submitted = st.form_submit_button("Apply Attribute Filters")
|
201 |
|
202 |
if submitted:
|
203 |
-
# Apply attribute filters to the already spatially filtered_df
|
204 |
if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
|
205 |
filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]
|
206 |
|
207 |
-
# Handle NaN values for zn_area before comparison by treating NaN as 0 for min comparison
|
208 |
filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]
|
209 |
|
210 |
if min_fsi_total > 0:
|
211 |
filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]
|
212 |
|
213 |
-
if max_prcnt_cver < 100:
|
214 |
filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]
|
215 |
|
216 |
if height_stories_option == "Height" and min_height_value > 0:
|
@@ -224,39 +203,31 @@ with st.form("attribute_filters"):
|
|
224 |
|
225 |
|
226 |
# --- 4. Display Filtered Data on a New Map and as a Table ---
|
227 |
-
st.subheader("Filtered Properties")
|
228 |
-
|
229 |
if not filtered_df.empty:
|
230 |
-
#
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
if min_lat == max_lat and min_lon == max_lon:
|
238 |
-
filtered_map_center = [min_lat, min_lon]
|
239 |
-
filtered_map_zoom = 18 # Very close zoom for single point
|
240 |
-
else:
|
241 |
-
filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
|
242 |
-
# Simple heuristic for zoom level based on spatial extent
|
243 |
-
lat_diff = max_lat - min_lat
|
244 |
-
lon_diff = max_lon - min_lon
|
245 |
-
if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
|
246 |
-
elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
|
247 |
-
elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
|
248 |
-
else: filtered_map_zoom = 12
|
249 |
else:
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
253 |
|
254 |
filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)
|
255 |
|
256 |
# Add the drawn polygon to the new map if it exists
|
257 |
if polygon_drawn and polygon_coords:
|
258 |
folium.Polygon(
|
259 |
-
locations=polygon_coords,
|
260 |
color="#ef233c",
|
261 |
fill=True,
|
262 |
fill_color="#ef233c",
|
@@ -285,7 +256,6 @@ if not filtered_df.empty:
|
|
285 |
st_folium(filtered_m, width=1000, height=500)
|
286 |
|
287 |
st.subheader("Filtered Properties Table")
|
288 |
-
# Display relevant columns in the table
|
289 |
display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']
|
290 |
st.dataframe(filtered_df[display_cols])
|
291 |
|
@@ -299,7 +269,7 @@ if not filtered_df.empty:
|
|
299 |
)
|
300 |
|
301 |
else:
|
302 |
-
st.warning("No properties match the current filters.
|
303 |
|
304 |
st.markdown("---")
|
305 |
st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")
|
|
|
18 |
def load_and_process_data():
|
19 |
"""
|
20 |
Loads the geospatial data from Hugging Face, processes relevant columns,
|
21 |
+
generates synthetic data for missing attributes, and re-projects for centroid calculation.
|
22 |
"""
|
23 |
try:
|
|
|
|
|
24 |
gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
|
25 |
except Exception as e:
|
26 |
st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
|
27 |
st.stop()
|
28 |
|
29 |
# Process STATEDAREA to numeric (Lot Area in Sq Metres)
|
|
|
30 |
def parse_stated_area(area_str):
|
31 |
if pd.isna(area_str):
|
32 |
return np.nan
|
|
|
41 |
gdf['zn_type'] = gdf['FEATURE_TYPE']
|
42 |
|
43 |
# Generate synthetic data for attributes not present in the Hugging Face dataset
|
|
|
44 |
num_rows = len(gdf)
|
45 |
gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
|
46 |
gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
|
|
|
52 |
gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")
|
53 |
|
54 |
# Ensure geometries are valid for centroid calculation and plotting
|
|
|
55 |
gdf['geometry'] = gdf['geometry'].buffer(0)
|
56 |
+
|
57 |
+
# --- IMPORTANT: Re-project for accurate centroid calculation ---
|
58 |
+
# Convert to a projected CRS (e.g., Web Mercator EPSG:3857) for accurate centroid calculation
|
59 |
+
gdf_projected = gdf.to_crs(epsg=3857)
|
60 |
+
|
61 |
+
# Calculate centroids on the projected CRS
|
62 |
+
gdf['centroid_x_proj'] = gdf_projected.geometry.centroid.x
|
63 |
+
gdf['centroid_y_proj'] = gdf_projected.geometry.centroid.y
|
64 |
+
|
65 |
+
# Convert centroids back to geographic CRS (EPSG:4326) for Folium plotting
|
66 |
+
centroids_gdf = gpd.GeoDataFrame(
|
67 |
+
gdf.index,
|
68 |
+
geometry=gpd.points_from_xy(gdf['centroid_x_proj'], gdf['centroid_y_proj'], crs="EPSG:3857")
|
69 |
+
).to_crs(epsg=4326)
|
70 |
+
|
71 |
+
gdf['latitude'] = centroids_gdf.geometry.y
|
72 |
+
gdf['longitude'] = centroids_gdf.geometry.x
|
73 |
|
74 |
# Select and reorder relevant columns for display and filtering
|
75 |
df_processed = gdf[[
|
|
|
84 |
df = load_and_process_data()
|
85 |
|
86 |
# Initialize filtered_df with the full dataframe for initial state
|
87 |
+
# This will be updated based on spatial and attribute filters
|
88 |
filtered_df = df.copy()
|
89 |
|
90 |
+
# --- 2. Initialize the Folium Map for Drawing (no initial points plotted) ---
|
91 |
# Center the map around the mean of the actual data's centroids
|
92 |
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
|
93 |
|
|
|
100 |
"polyline": False, "rectangle": False, "circlemarker": False,
|
101 |
"circle": False, "marker": False,
|
102 |
"polygon": {
|
103 |
+
"allowIntersection": False,
|
104 |
"drawError": {"color": "#e0115f", "message": "Oups!"},
|
105 |
"shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
|
106 |
},
|
|
|
109 |
)
|
110 |
m.add_child(draw)
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
st.subheader("Draw a Polygon on the Map")
|
113 |
+
st.info("Draw a polygon on the map to spatially filter properties. The filtered results will appear below.")
|
114 |
output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])
|
115 |
|
116 |
polygon_drawn = False
|
|
|
136 |
lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
|
137 |
axis=1
|
138 |
)
|
139 |
+
].copy()
|
140 |
st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
|
141 |
else:
|
142 |
+
st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")
|
143 |
else:
|
144 |
+
st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")
|
145 |
|
146 |
# --- 3. Attribute Filtering Form ---
|
147 |
st.subheader("Filter Property Attributes")
|
|
|
150 |
col1, col2 = st.columns(2)
|
151 |
|
152 |
with col1:
|
|
|
|
|
153 |
all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
|
154 |
selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")
|
155 |
|
|
|
|
|
156 |
min_zn_area = st.number_input(
|
157 |
"Minimum Lot Area in Sq Metres",
|
158 |
min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
|
|
|
161 |
key="zn_area_input"
|
162 |
)
|
163 |
|
|
|
164 |
min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")
|
165 |
|
166 |
with col2:
|
|
|
167 |
max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")
|
168 |
|
|
|
169 |
height_stories_option = st.radio(
|
170 |
"Filter by",
|
171 |
("Height", "Stories"),
|
172 |
+
index=0,
|
173 |
key="height_stories_radio"
|
174 |
)
|
175 |
|
|
|
176 |
if height_stories_option == "Height":
|
177 |
min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
|
178 |
+
else:
|
179 |
min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")
|
180 |
|
181 |
submitted = st.form_submit_button("Apply Attribute Filters")
|
182 |
|
183 |
if submitted:
|
|
|
184 |
if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
|
185 |
filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]
|
186 |
|
|
|
187 |
filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]
|
188 |
|
189 |
if min_fsi_total > 0:
|
190 |
filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]
|
191 |
|
192 |
+
if max_prcnt_cver < 100:
|
193 |
filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]
|
194 |
|
195 |
if height_stories_option == "Height" and min_height_value > 0:
|
|
|
203 |
|
204 |
|
205 |
# --- 4. Display Filtered Data on a New Map and as a Table ---
|
206 |
+
st.subheader("Filtered Properties Display")
|
|
|
207 |
if not filtered_df.empty:
|
208 |
+
# Calculate bounds for filtered data to set appropriate zoom
|
209 |
+
min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
|
210 |
+
min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()
|
211 |
+
|
212 |
+
if min_lat == max_lat and min_lon == max_lon: # Single point case
|
213 |
+
filtered_map_center = [min_lat, min_lon]
|
214 |
+
filtered_map_zoom = 18
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
else:
|
216 |
+
filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
|
217 |
+
lat_diff = max_lat - min_lat
|
218 |
+
lon_diff = max_lon - min_lon
|
219 |
+
# Heuristic for zoom level
|
220 |
+
if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
|
221 |
+
elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
|
222 |
+
elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
|
223 |
+
else: filtered_map_zoom = 12
|
224 |
|
225 |
filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)
|
226 |
|
227 |
# Add the drawn polygon to the new map if it exists
|
228 |
if polygon_drawn and polygon_coords:
|
229 |
folium.Polygon(
|
230 |
+
locations=polygon_coords,
|
231 |
color="#ef233c",
|
232 |
fill=True,
|
233 |
fill_color="#ef233c",
|
|
|
256 |
st_folium(filtered_m, width=1000, height=500)
|
257 |
|
258 |
st.subheader("Filtered Properties Table")
|
|
|
259 |
display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']
|
260 |
st.dataframe(filtered_df[display_cols])
|
261 |
|
|
|
269 |
)
|
270 |
|
271 |
else:
|
272 |
+
st.warning("No properties match the current filters. Adjust your criteria or draw a polygon on the map.")
|
273 |
|
274 |
st.markdown("---")
|
275 |
st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")
|