import streamlit as st from streamlit_folium import st_folium import folium from folium.plugins import Draw import pandas as pd import geopandas as gpd from shapely.geometry import Polygon, Point import numpy as np import re # For parsing STATEDAREA st.set_page_config(layout="wide", page_title="Multiplex Coop Housing Filter") st.title("🗺️ Multiplex Coop Housing Filter (Hugging Face Data)") st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset from Hugging Face. Draw a polygon on the map to spatially filter properties. Use the form below to apply additional filters based on property attributes. **Note: FSI, Building Coverage, Height, and Stories are synthetic for demonstration as they are not directly available in the dataset.**") # --- 1. Load Data from Hugging Face and Process --- @st.cache_data def load_and_process_data(): """ Loads the geospatial data from Hugging Face, processes relevant columns, and generates synthetic data for missing attributes. """ try: # Load the geospatial data using geopandas # Ensure you have 'huggingface_hub', 'geopandas', 'fiona', 'pyproj', 'shapely' installed. gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet") except Exception as e: st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}") st.stop() # Process STATEDAREA to numeric (Lot Area in Sq Metres) # The format is like "17366.998291 sq.m" def parse_stated_area(area_str): if pd.isna(area_str): return np.nan match = re.search(r'(\d+\.?\d*)\s*sq\.m', str(area_str)) if match: return float(match.group(1)) return np.nan gdf['zn_area'] = gdf['STATEDAREA'].apply(parse_stated_area) # Map FEATURE_TYPE to zn_type (Zoning Type) gdf['zn_type'] = gdf['FEATURE_TYPE'] # Generate synthetic data for attributes not present in the Hugging Face dataset # but required for the filter functionality as per the original HTML. num_rows = len(gdf) gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2) gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows) gdf['height_metres'] = np.round(np.random.uniform(5, 30, num_rows), 1) gdf['stories'] = np.random.randint(2, 10, num_rows) # Add unique ID and a display name gdf['id'] = range(1, num_rows + 1) gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}") # Ensure geometries are valid for centroid calculation and plotting # .buffer(0) is a common trick to fix minor geometry issues gdf['geometry'] = gdf['geometry'].buffer(0) # Extract centroids for point-based filtering and initial map markers gdf['latitude'] = gdf.geometry.centroid.y gdf['longitude'] = gdf.geometry.centroid.x # Select and reorder relevant columns for display and filtering df_processed = gdf[[ 'id', 'name', 'latitude', 'longitude', 'geometry', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'PARCELID', # Original Parcel ID for reference 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL' # For detailed address in tooltips ]].copy() return df_processed df = load_and_process_data() # Initialize filtered_df with the full dataframe for initial state filtered_df = df.copy() # --- 2. Initialize the Folium Map with Drawing Tools --- # Center the map around the mean of the actual data's centroids m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12) # Add drawing tools to the map draw = Draw( export=True, filename="drawn_polygon.geojson", position="topleft", draw_options={ "polyline": False, "rectangle": False, "circlemarker": False, "circle": False, "marker": False, "polygon": { "allowIntersection": False, # Restricts polygons to not intersect themselves "drawError": {"color": "#e0115f", "message": "Oups!"}, "shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5}, }, }, edit_options={"edit": False, "remove": True}, ) m.add_child(draw) # Add a sample of points to the initial map for responsiveness # Plotting all 500k+ polygons/points at once can cause performance issues. sample_df_for_initial_map = df.sample(min(1000, len(df)), random_state=42) # Sample up to 1000 points for idx, row in sample_df_for_initial_map.iterrows(): folium.CircleMarker( location=[row['latitude'], row['longitude']], radius=3, # Smaller radius for denser data points color='blue', fill=True, fill_color='blue', fill_opacity=0.5, tooltip=( f"Parcel ID: {row['PARCELID']}
Name: {row['name']}
Zoning: {row['zn_type']}
" f"Area: {row['zn_area'] if pd.notna(row['zn_area']) else 'N/A'} m²
" f"FSI: {row['fsi_total']}
Coverage: {row['prcnt_cver']}%
" f"Height: {row['height_metres']}m
Stories: {row['stories']}
" f"Address: {row['ADDRESS_NUMBER'] if pd.notna(row['ADDRESS_NUMBER']) else ''} {row['LINEAR_NAME_FULL'] if pd.notna(row['LINEAR_NAME_FULL']) else ''}" ) ).add_to(m) st.subheader("Draw a Polygon on the Map") st.info(f"Displaying a sample of {len(sample_df_for_initial_map)} points on the map for responsiveness. All {len(df)} properties will be used for filtering.") output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"]) polygon_drawn = False shapely_polygon = None polygon_coords = None if output and output["all_draw_features"]: polygons = [ feature["geometry"]["coordinates"] for feature in output["all_draw_features"] if feature["geometry"]["type"] == "Polygon" ] if polygons: polygon_coords = polygons[-1][0] # Get the coordinates of the last drawn polygon # Shapely Polygon expects (lon, lat) tuples, Folium provides (lat, lon) shapely_polygon = Polygon([(lon, lat) for lat, lon in polygon_coords]) polygon_drawn = True # Apply spatial filter to the full dataframe based on centroid containment filtered_df = df[ df.apply( lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])), axis=1 ) ].copy() # Use .copy() to avoid SettingWithCopyWarning st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.") else: st.info("Draw a polygon on the map to spatially filter properties.") else: st.info("Draw a polygon on the map to spatially filter properties.") # --- 3. Attribute Filtering Form --- st.subheader("Filter Property Attributes") with st.form("attribute_filters"): col1, col2 = st.columns(2) with col1: # Zoning Type filter # Get unique zoning types from the loaded data, including a default 'All' option all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist()) selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select") # Lot Area in Sq Metres filter # Use actual min/max from data for number input range min_zn_area = st.number_input( "Minimum Lot Area in Sq Metres", min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0), value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0), step=100.0, key="zn_area_input" ) # Floor Space Index (FSI) filter - Synthetic data min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input") with col2: # Building Percent Coverage filter - Synthetic data max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input") # Height or Stories selection - Synthetic data height_stories_option = st.radio( "Filter by", ("Height", "Stories"), index=0, # Default to Height key="height_stories_radio" ) # Single input field for height/stories, label changes dynamically if height_stories_option == "Height": min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input") else: # Stories min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input") submitted = st.form_submit_button("Apply Attribute Filters") if submitted: # Apply attribute filters to the already spatially filtered_df if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)': filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type] # Handle NaN values for zn_area before comparison by treating NaN as 0 for min comparison filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area] if min_fsi_total > 0: filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total] if max_prcnt_cver < 100: # Assuming 100% means no upper limit applied filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver] if height_stories_option == "Height" and min_height_value > 0: filtered_df = filtered_df[filtered_df['height_metres'] >= min_height_value] elif height_stories_option == "Stories" and min_stories_value > 0: filtered_df = filtered_df[filtered_df['stories'] >= min_stories_value] st.success(f"Applied attribute filters. Total properties after all filters: {len(filtered_df)}") else: st.info("Adjust filters and click 'Apply Attribute Filters'.") # --- 4. Display Filtered Data on a New Map and as a Table --- st.subheader("Filtered Properties") if not filtered_df.empty: # Create a new map to show only the filtered properties if len(filtered_df) > 0: # Calculate bounds for filtered data to set appropriate zoom min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max() min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max() # Adjust map center and zoom dynamically based on filtered data extent if min_lat == max_lat and min_lon == max_lon: filtered_map_center = [min_lat, min_lon] filtered_map_zoom = 18 # Very close zoom for single point else: filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()] # Simple heuristic for zoom level based on spatial extent lat_diff = max_lat - min_lat lon_diff = max_lon - min_lon if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18 elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16 elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14 else: filtered_map_zoom = 12 else: # Fallback to original map center if no data is filtered filtered_map_center = [df['latitude'].mean(), df['longitude'].mean()] filtered_map_zoom = 12 filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom) # Add the drawn polygon to the new map if it exists if polygon_drawn and polygon_coords: folium.Polygon( locations=polygon_coords, # Use original (lat,lon) for folium color="#ef233c", fill=True, fill_color="#ef233c", fill_opacity=0.5 ).add_to(filtered_m) # Convert filtered_df back to GeoDataFrame for direct plotting of geometries filtered_gdf = gpd.GeoDataFrame(filtered_df, geometry='geometry') # Add filtered polygons to the map as GeoJSON layer folium.GeoJson( filtered_gdf.to_json(), style_function=lambda x: { 'fillColor': 'green', 'color': 'darkgreen', 'weight': 1, 'fillOpacity': 0.7 }, tooltip=folium.GeoJsonTooltip( fields=['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'], aliases=['Parcel ID:', 'Zoning Type:', 'Lot Area (m²):', 'FSI:', 'Coverage (%):', 'Height (m):', 'Stories:', 'Address Num:', 'Street:'], localize=True ) ).add_to(filtered_m) st_folium(filtered_m, width=1000, height=500) st.subheader("Filtered Properties Table") # Display relevant columns in the table display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'] st.dataframe(filtered_df[display_cols]) # --- 5. Export Data Button --- csv = filtered_df.to_csv(index=False).encode('utf-8') st.download_button( label="Export Filtered Data to CSV", data=csv, file_name="multiplex_coop_filtered_properties.csv", mime="text/csv", ) else: st.warning("No properties match the current filters. Try adjusting your criteria or drawing a different polygon.") st.markdown("---") st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")