npc0 commited on
Commit
af5ac46
·
verified ·
1 Parent(s): 3545922

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +46 -76
src/streamlit_app.py CHANGED
@@ -18,18 +18,15 @@ st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset fr
18
  def load_and_process_data():
19
  """
20
  Loads the geospatial data from Hugging Face, processes relevant columns,
21
- and generates synthetic data for missing attributes.
22
  """
23
  try:
24
- # Load the geospatial data using geopandas
25
- # Ensure you have 'huggingface_hub', 'geopandas', 'fiona', 'pyproj', 'shapely' installed.
26
  gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
27
  except Exception as e:
28
  st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
29
  st.stop()
30
 
31
  # Process STATEDAREA to numeric (Lot Area in Sq Metres)
32
- # The format is like "17366.998291 sq.m"
33
  def parse_stated_area(area_str):
34
  if pd.isna(area_str):
35
  return np.nan
@@ -44,7 +41,6 @@ def load_and_process_data():
44
  gdf['zn_type'] = gdf['FEATURE_TYPE']
45
 
46
  # Generate synthetic data for attributes not present in the Hugging Face dataset
47
- # but required for the filter functionality as per the original HTML.
48
  num_rows = len(gdf)
49
  gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
50
  gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
@@ -56,11 +52,24 @@ def load_and_process_data():
56
  gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")
57
 
58
  # Ensure geometries are valid for centroid calculation and plotting
59
- # .buffer(0) is a common trick to fix minor geometry issues
60
  gdf['geometry'] = gdf['geometry'].buffer(0)
61
- # Extract centroids for point-based filtering and initial map markers
62
- gdf['latitude'] = gdf.geometry.centroid.y
63
- gdf['longitude'] = gdf.geometry.centroid.x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  # Select and reorder relevant columns for display and filtering
66
  df_processed = gdf[[
@@ -75,9 +84,10 @@ def load_and_process_data():
75
  df = load_and_process_data()
76
 
77
  # Initialize filtered_df with the full dataframe for initial state
 
78
  filtered_df = df.copy()
79
 
80
- # --- 2. Initialize the Folium Map with Drawing Tools ---
81
  # Center the map around the mean of the actual data's centroids
82
  m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
83
 
@@ -90,7 +100,7 @@ draw = Draw(
90
  "polyline": False, "rectangle": False, "circlemarker": False,
91
  "circle": False, "marker": False,
92
  "polygon": {
93
- "allowIntersection": False, # Restricts polygons to not intersect themselves
94
  "drawError": {"color": "#e0115f", "message": "Oups!"},
95
  "shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
96
  },
@@ -99,29 +109,8 @@ draw = Draw(
99
  )
100
  m.add_child(draw)
101
 
102
- # Add a sample of points to the initial map for responsiveness
103
- # Plotting all 500k+ polygons/points at once can cause performance issues.
104
- sample_df_for_initial_map = df.sample(min(1000, len(df)), random_state=42) # Sample up to 1000 points
105
-
106
- for idx, row in sample_df_for_initial_map.iterrows():
107
- folium.CircleMarker(
108
- location=[row['latitude'], row['longitude']],
109
- radius=3, # Smaller radius for denser data points
110
- color='blue',
111
- fill=True,
112
- fill_color='blue',
113
- fill_opacity=0.5,
114
- tooltip=(
115
- f"Parcel ID: {row['PARCELID']}<br>Name: {row['name']}<br>Zoning: {row['zn_type']}<br>"
116
- f"Area: {row['zn_area'] if pd.notna(row['zn_area']) else 'N/A'} m²<br>"
117
- f"FSI: {row['fsi_total']}<br>Coverage: {row['prcnt_cver']}%<br>"
118
- f"Height: {row['height_metres']}m<br>Stories: {row['stories']}<br>"
119
- f"Address: {row['ADDRESS_NUMBER'] if pd.notna(row['ADDRESS_NUMBER']) else ''} {row['LINEAR_NAME_FULL'] if pd.notna(row['LINEAR_NAME_FULL']) else ''}"
120
- )
121
- ).add_to(m)
122
-
123
  st.subheader("Draw a Polygon on the Map")
124
- st.info(f"Displaying a sample of {len(sample_df_for_initial_map)} points on the map for responsiveness. All {len(df)} properties will be used for filtering.")
125
  output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])
126
 
127
  polygon_drawn = False
@@ -147,12 +136,12 @@ if output and output["all_draw_features"]:
147
  lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
148
  axis=1
149
  )
150
- ].copy() # Use .copy() to avoid SettingWithCopyWarning
151
  st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
152
  else:
153
- st.info("Draw a polygon on the map to spatially filter properties.")
154
  else:
155
- st.info("Draw a polygon on the map to spatially filter properties.")
156
 
157
  # --- 3. Attribute Filtering Form ---
158
  st.subheader("Filter Property Attributes")
@@ -161,13 +150,9 @@ with st.form("attribute_filters"):
161
  col1, col2 = st.columns(2)
162
 
163
  with col1:
164
- # Zoning Type filter
165
- # Get unique zoning types from the loaded data, including a default 'All' option
166
  all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
167
  selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")
168
 
169
- # Lot Area in Sq Metres filter
170
- # Use actual min/max from data for number input range
171
  min_zn_area = st.number_input(
172
  "Minimum Lot Area in Sq Metres",
173
  min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
@@ -176,41 +161,35 @@ with st.form("attribute_filters"):
176
  key="zn_area_input"
177
  )
178
 
179
- # Floor Space Index (FSI) filter - Synthetic data
180
  min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")
181
 
182
  with col2:
183
- # Building Percent Coverage filter - Synthetic data
184
  max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")
185
 
186
- # Height or Stories selection - Synthetic data
187
  height_stories_option = st.radio(
188
  "Filter by",
189
  ("Height", "Stories"),
190
- index=0, # Default to Height
191
  key="height_stories_radio"
192
  )
193
 
194
- # Single input field for height/stories, label changes dynamically
195
  if height_stories_option == "Height":
196
  min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
197
- else: # Stories
198
  min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")
199
 
200
  submitted = st.form_submit_button("Apply Attribute Filters")
201
 
202
  if submitted:
203
- # Apply attribute filters to the already spatially filtered_df
204
  if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
205
  filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]
206
 
207
- # Handle NaN values for zn_area before comparison by treating NaN as 0 for min comparison
208
  filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]
209
 
210
  if min_fsi_total > 0:
211
  filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]
212
 
213
- if max_prcnt_cver < 100: # Assuming 100% means no upper limit applied
214
  filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]
215
 
216
  if height_stories_option == "Height" and min_height_value > 0:
@@ -224,39 +203,31 @@ with st.form("attribute_filters"):
224
 
225
 
226
  # --- 4. Display Filtered Data on a New Map and as a Table ---
227
- st.subheader("Filtered Properties")
228
-
229
  if not filtered_df.empty:
230
- # Create a new map to show only the filtered properties
231
- if len(filtered_df) > 0:
232
- # Calculate bounds for filtered data to set appropriate zoom
233
- min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
234
- min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()
235
-
236
- # Adjust map center and zoom dynamically based on filtered data extent
237
- if min_lat == max_lat and min_lon == max_lon:
238
- filtered_map_center = [min_lat, min_lon]
239
- filtered_map_zoom = 18 # Very close zoom for single point
240
- else:
241
- filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
242
- # Simple heuristic for zoom level based on spatial extent
243
- lat_diff = max_lat - min_lat
244
- lon_diff = max_lon - min_lon
245
- if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
246
- elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
247
- elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
248
- else: filtered_map_zoom = 12
249
  else:
250
- # Fallback to original map center if no data is filtered
251
- filtered_map_center = [df['latitude'].mean(), df['longitude'].mean()]
252
- filtered_map_zoom = 12
 
 
 
 
 
253
 
254
  filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)
255
 
256
  # Add the drawn polygon to the new map if it exists
257
  if polygon_drawn and polygon_coords:
258
  folium.Polygon(
259
- locations=polygon_coords, # Use original (lat,lon) for folium
260
  color="#ef233c",
261
  fill=True,
262
  fill_color="#ef233c",
@@ -285,7 +256,6 @@ if not filtered_df.empty:
285
  st_folium(filtered_m, width=1000, height=500)
286
 
287
  st.subheader("Filtered Properties Table")
288
- # Display relevant columns in the table
289
  display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']
290
  st.dataframe(filtered_df[display_cols])
291
 
@@ -299,7 +269,7 @@ if not filtered_df.empty:
299
  )
300
 
301
  else:
302
- st.warning("No properties match the current filters. Try adjusting your criteria or drawing a different polygon.")
303
 
304
  st.markdown("---")
305
  st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")
 
18
  def load_and_process_data():
19
  """
20
  Loads the geospatial data from Hugging Face, processes relevant columns,
21
+ generates synthetic data for missing attributes, and re-projects for centroid calculation.
22
  """
23
  try:
 
 
24
  gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
25
  except Exception as e:
26
  st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
27
  st.stop()
28
 
29
  # Process STATEDAREA to numeric (Lot Area in Sq Metres)
 
30
  def parse_stated_area(area_str):
31
  if pd.isna(area_str):
32
  return np.nan
 
41
  gdf['zn_type'] = gdf['FEATURE_TYPE']
42
 
43
  # Generate synthetic data for attributes not present in the Hugging Face dataset
 
44
  num_rows = len(gdf)
45
  gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
46
  gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
 
52
  gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")
53
 
54
  # Ensure geometries are valid for centroid calculation and plotting
 
55
  gdf['geometry'] = gdf['geometry'].buffer(0)
56
+
57
+ # --- IMPORTANT: Re-project for accurate centroid calculation ---
58
+ # Convert to a projected CRS (e.g., Web Mercator EPSG:3857) for accurate centroid calculation
59
+ gdf_projected = gdf.to_crs(epsg=3857)
60
+
61
+ # Calculate centroids on the projected CRS
62
+ gdf['centroid_x_proj'] = gdf_projected.geometry.centroid.x
63
+ gdf['centroid_y_proj'] = gdf_projected.geometry.centroid.y
64
+
65
+ # Convert centroids back to geographic CRS (EPSG:4326) for Folium plotting
66
+ centroids_gdf = gpd.GeoDataFrame(
67
+ gdf.index,
68
+ geometry=gpd.points_from_xy(gdf['centroid_x_proj'], gdf['centroid_y_proj'], crs="EPSG:3857")
69
+ ).to_crs(epsg=4326)
70
+
71
+ gdf['latitude'] = centroids_gdf.geometry.y
72
+ gdf['longitude'] = centroids_gdf.geometry.x
73
 
74
  # Select and reorder relevant columns for display and filtering
75
  df_processed = gdf[[
 
84
  df = load_and_process_data()
85
 
86
  # Initialize filtered_df with the full dataframe for initial state
87
+ # This will be updated based on spatial and attribute filters
88
  filtered_df = df.copy()
89
 
90
+ # --- 2. Initialize the Folium Map for Drawing (no initial points plotted) ---
91
  # Center the map around the mean of the actual data's centroids
92
  m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)
93
 
 
100
  "polyline": False, "rectangle": False, "circlemarker": False,
101
  "circle": False, "marker": False,
102
  "polygon": {
103
+ "allowIntersection": False,
104
  "drawError": {"color": "#e0115f", "message": "Oups!"},
105
  "shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
106
  },
 
109
  )
110
  m.add_child(draw)
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  st.subheader("Draw a Polygon on the Map")
113
+ st.info("Draw a polygon on the map to spatially filter properties. The filtered results will appear below.")
114
  output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])
115
 
116
  polygon_drawn = False
 
136
  lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
137
  axis=1
138
  )
139
+ ].copy()
140
  st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
141
  else:
142
+ st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")
143
  else:
144
+ st.info("No polygon drawn yet. Draw a polygon on the map to spatially filter properties.")
145
 
146
  # --- 3. Attribute Filtering Form ---
147
  st.subheader("Filter Property Attributes")
 
150
  col1, col2 = st.columns(2)
151
 
152
  with col1:
 
 
153
  all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
154
  selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")
155
 
 
 
156
  min_zn_area = st.number_input(
157
  "Minimum Lot Area in Sq Metres",
158
  min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
 
161
  key="zn_area_input"
162
  )
163
 
 
164
  min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")
165
 
166
  with col2:
 
167
  max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")
168
 
 
169
  height_stories_option = st.radio(
170
  "Filter by",
171
  ("Height", "Stories"),
172
+ index=0,
173
  key="height_stories_radio"
174
  )
175
 
 
176
  if height_stories_option == "Height":
177
  min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
178
+ else:
179
  min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")
180
 
181
  submitted = st.form_submit_button("Apply Attribute Filters")
182
 
183
  if submitted:
 
184
  if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
185
  filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]
186
 
 
187
  filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]
188
 
189
  if min_fsi_total > 0:
190
  filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]
191
 
192
+ if max_prcnt_cver < 100:
193
  filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]
194
 
195
  if height_stories_option == "Height" and min_height_value > 0:
 
203
 
204
 
205
  # --- 4. Display Filtered Data on a New Map and as a Table ---
206
+ st.subheader("Filtered Properties Display")
 
207
  if not filtered_df.empty:
208
+ # Calculate bounds for filtered data to set appropriate zoom
209
+ min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
210
+ min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()
211
+
212
+ if min_lat == max_lat and min_lon == max_lon: # Single point case
213
+ filtered_map_center = [min_lat, min_lon]
214
+ filtered_map_zoom = 18
 
 
 
 
 
 
 
 
 
 
 
 
215
  else:
216
+ filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
217
+ lat_diff = max_lat - min_lat
218
+ lon_diff = max_lon - min_lon
219
+ # Heuristic for zoom level
220
+ if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
221
+ elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
222
+ elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
223
+ else: filtered_map_zoom = 12
224
 
225
  filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)
226
 
227
  # Add the drawn polygon to the new map if it exists
228
  if polygon_drawn and polygon_coords:
229
  folium.Polygon(
230
+ locations=polygon_coords,
231
  color="#ef233c",
232
  fill=True,
233
  fill_color="#ef233c",
 
256
  st_folium(filtered_m, width=1000, height=500)
257
 
258
  st.subheader("Filtered Properties Table")
 
259
  display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']
260
  st.dataframe(filtered_df[display_cols])
261
 
 
269
  )
270
 
271
  else:
272
+ st.warning("No properties match the current filters. Adjust your criteria or draw a polygon on the map.")
273
 
274
  st.markdown("---")
275
  st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")