Spaces:

ProjectMultiplexCoop
/

MultiplexCoopHousingBI_Demo

Sleeping

App Files Files Community

MultiplexCoopHousingBI_Demo / src /streamlit_app.py

npc0

Update src/streamlit_app.py

15b2d37 verified 2 months ago

raw

history blame

13.7 kB

	import streamlit as st
	from streamlit_folium import st_folium
	import folium
	from folium.plugins import Draw
	import pandas as pd
	import geopandas as gpd
	from shapely.geometry import Polygon, Point
	import numpy as np
	import re # For parsing STATEDAREA

	st.set_page_config(layout="wide", page_title="Multiplex Coop Housing Filter")

	st.title("🗺️ Multiplex Coop Housing Filter (Hugging Face Data)")
	st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset from Hugging Face. Draw a polygon on the map to spatially filter properties. Use the form below to apply additional filters based on property attributes. Note: FSI, Building Coverage, Height, and Stories are synthetic for demonstration as they are not directly available in the dataset.")

	# --- 1. Load Data from Hugging Face and Process ---
	@st.cache_data
	def load_and_process_data():
	"""
	Loads the geospatial data from Hugging Face, processes relevant columns,
	and generates synthetic data for missing attributes.
	"""
	try:
	# Load the geospatial data using geopandas
	# Ensure you have 'huggingface_hub', 'geopandas', 'fiona', 'pyproj', 'shapely' installed.
	gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
	except Exception as e:
	st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
	st.stop()

	# Process STATEDAREA to numeric (Lot Area in Sq Metres)
	# The format is like "17366.998291 sq.m"
	def parse_stated_area(area_str):
	if pd.isna(area_str):
	return np.nan
	match = re.search(r'(\d+\.?\d)\ssq\.m', str(area_str))
	if match:
	return float(match.group(1))
	return np.nan

	gdf['zn_area'] = gdf['STATEDAREA'].apply(parse_stated_area)

	# Map FEATURE_TYPE to zn_type (Zoning Type)
	gdf['zn_type'] = gdf['FEATURE_TYPE']

	# Generate synthetic data for attributes not present in the Hugging Face dataset
	# but required for the filter functionality as per the original HTML.
	num_rows = len(gdf)
	gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
	gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
	gdf['height_metres'] = np.round(np.random.uniform(5, 30, num_rows), 1)
	gdf['stories'] = np.random.randint(2, 10, num_rows)

	# Add unique ID and a display name
	gdf['id'] = range(1, num_rows + 1)
	gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")

	# Ensure geometries are valid for centroid calculation and plotting
	# .buffer(0) is a common trick to fix minor geometry issues
	gdf['geometry'] = gdf['geometry'].buffer(0)
	# Extract centroids for point-based filtering and initial map markers
	gdf['latitude'] = gdf.geometry.centroid.y
	gdf['longitude'] = gdf.geometry.centroid.x

	# Select and reorder relevant columns for display and filtering
	df_processed = gdf[[
	'id', 'name', 'latitude', 'longitude', 'geometry',
	'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories',
	'PARCELID', # Original Parcel ID for reference
	'ADDRESS_NUMBER', 'LINEAR_NAME_FULL' # For detailed address in tooltips
	]].copy()

	return df_processed

	df = load_and_process_data()

	# Initialize filtered_df with the full dataframe for initial state
	filtered_df = df.copy()

	# --- 2. Initialize the Folium Map with Drawing Tools ---
	# Center the map around the mean of the actual data's centroids
	m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)

	# Add drawing tools to the map
	draw = Draw(
	export=True,
	filename="drawn_polygon.geojson",
	position="topleft",
	draw_options={
	"polyline": False, "rectangle": False, "circlemarker": False,
	"circle": False, "marker": False,
	"polygon": {
	"allowIntersection": False, # Restricts polygons to not intersect themselves
	"drawError": {"color": "#e0115f", "message": "Oups!"},
	"shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
	},
	},
	edit_options={"edit": False, "remove": True},
	)
	m.add_child(draw)

	# Add a sample of points to the initial map for responsiveness
	# Plotting all 500k+ polygons/points at once can cause performance issues.
	sample_df_for_initial_map = df.sample(min(1000, len(df)), random_state=42) # Sample up to 1000 points

	for idx, row in sample_df_for_initial_map.iterrows():
	folium.CircleMarker(
	location=[row['latitude'], row['longitude']],
	radius=3, # Smaller radius for denser data points
	color='blue',
	fill=True,
	fill_color='blue',
	fill_opacity=0.5,
	tooltip=(
	f"Parcel ID: {row['PARCELID']}<br>Name: {row['name']}<br>Zoning: {row['zn_type']}<br>"
	f"Area: {row['zn_area'] if pd.notna(row['zn_area']) else 'N/A'} m²<br>"
	f"FSI: {row['fsi_total']}<br>Coverage: {row['prcnt_cver']}%<br>"
	f"Height: {row['height_metres']}m<br>Stories: {row['stories']}<br>"
	f"Address: {row['ADDRESS_NUMBER'] if pd.notna(row['ADDRESS_NUMBER']) else ''} {row['LINEAR_NAME_FULL'] if pd.notna(row['LINEAR_NAME_FULL']) else ''}"
	)
	).add_to(m)

	st.subheader("Draw a Polygon on the Map")
	st.info(f"Displaying a sample of {len(sample_df_for_initial_map)} points on the map for responsiveness. All {len(df)} properties will be used for filtering.")
	output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])

	polygon_drawn = False
	shapely_polygon = None
	polygon_coords = None

	if output and output["all_draw_features"]:
	polygons = [
	feature["geometry"]["coordinates"]
	for feature in output["all_draw_features"]
	if feature["geometry"]["type"] == "Polygon"
	]

	if polygons:
	polygon_coords = polygons[-1][0] # Get the coordinates of the last drawn polygon
	# Shapely Polygon expects (lon, lat) tuples, Folium provides (lat, lon)
	shapely_polygon = Polygon([(lon, lat) for lat, lon in polygon_coords])
	polygon_drawn = True

	# Apply spatial filter to the full dataframe based on centroid containment
	filtered_df = df[
	df.apply(
	lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
	axis=1
	)
	].copy() # Use .copy() to avoid SettingWithCopyWarning
	st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
	else:
	st.info("Draw a polygon on the map to spatially filter properties.")
	else:
	st.info("Draw a polygon on the map to spatially filter properties.")

	# --- 3. Attribute Filtering Form ---
	st.subheader("Filter Property Attributes")

	with st.form("attribute_filters"):
	col1, col2 = st.columns(2)

	with col1:
	# Zoning Type filter
	# Get unique zoning types from the loaded data, including a default 'All' option
	all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
	selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")

	# Lot Area in Sq Metres filter
	# Use actual min/max from data for number input range
	min_zn_area = st.number_input(
	"Minimum Lot Area in Sq Metres",
	min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
	value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
	step=100.0,
	key="zn_area_input"
	)

	# Floor Space Index (FSI) filter - Synthetic data
	min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")

	with col2:
	# Building Percent Coverage filter - Synthetic data
	max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")

	# Height or Stories selection - Synthetic data
	height_stories_option = st.radio(
	"Filter by",
	("Height", "Stories"),
	index=0, # Default to Height
	key="height_stories_radio"
	)

	# Single input field for height/stories, label changes dynamically
	if height_stories_option == "Height":
	min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
	else: # Stories
	min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")

	submitted = st.form_submit_button("Apply Attribute Filters")

	if submitted:
	# Apply attribute filters to the already spatially filtered_df
	if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
	filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]

	# Handle NaN values for zn_area before comparison by treating NaN as 0 for min comparison
	filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]

	if min_fsi_total > 0:
	filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]

	if max_prcnt_cver < 100: # Assuming 100% means no upper limit applied
	filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]

	if height_stories_option == "Height" and min_height_value > 0:
	filtered_df = filtered_df[filtered_df['height_metres'] >= min_height_value]
	elif height_stories_option == "Stories" and min_stories_value > 0:
	filtered_df = filtered_df[filtered_df['stories'] >= min_stories_value]

	st.success(f"Applied attribute filters. Total properties after all filters: {len(filtered_df)}")
	else:
	st.info("Adjust filters and click 'Apply Attribute Filters'.")


	# --- 4. Display Filtered Data on a New Map and as a Table ---
	st.subheader("Filtered Properties")

	if not filtered_df.empty:
	# Create a new map to show only the filtered properties
	if len(filtered_df) > 0:
	# Calculate bounds for filtered data to set appropriate zoom
	min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
	min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()

	# Adjust map center and zoom dynamically based on filtered data extent
	if min_lat == max_lat and min_lon == max_lon:
	filtered_map_center = [min_lat, min_lon]
	filtered_map_zoom = 18 # Very close zoom for single point
	else:
	filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
	# Simple heuristic for zoom level based on spatial extent
	lat_diff = max_lat - min_lat
	lon_diff = max_lon - min_lon
	if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
	elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
	elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
	else: filtered_map_zoom = 12
	else:
	# Fallback to original map center if no data is filtered
	filtered_map_center = [df['latitude'].mean(), df['longitude'].mean()]
	filtered_map_zoom = 12

	filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)

	# Add the drawn polygon to the new map if it exists
	if polygon_drawn and polygon_coords:
	folium.Polygon(
	locations=polygon_coords, # Use original (lat,lon) for folium
	color="#ef233c",
	fill=True,
	fill_color="#ef233c",
	fill_opacity=0.5
	).add_to(filtered_m)

	# Convert filtered_df back to GeoDataFrame for direct plotting of geometries
	filtered_gdf = gpd.GeoDataFrame(filtered_df, geometry='geometry')

	# Add filtered polygons to the map as GeoJSON layer
	folium.GeoJson(
	filtered_gdf.to_json(),
	style_function=lambda x: {
	'fillColor': 'green',
	'color': 'darkgreen',
	'weight': 1,
	'fillOpacity': 0.7
	},
	tooltip=folium.GeoJsonTooltip(
	fields=['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'],
	aliases=['Parcel ID:', 'Zoning Type:', 'Lot Area (m²):', 'FSI:', 'Coverage (%):', 'Height (m):', 'Stories:', 'Address Num:', 'Street:'],
	localize=True
	)
	).add_to(filtered_m)

	st_folium(filtered_m, width=1000, height=500)

	st.subheader("Filtered Properties Table")
	# Display relevant columns in the table
	display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']
	st.dataframe(filtered_df[display_cols])

	# --- 5. Export Data Button ---
	csv = filtered_df.to_csv(index=False).encode('utf-8')
	st.download_button(
	label="Export Filtered Data to CSV",
	data=csv,
	file_name="multiplex_coop_filtered_properties.csv",
	mime="text/csv",
	)

	else:
	st.warning("No properties match the current filters. Try adjusting your criteria or drawing a different polygon.")

	st.markdown("---")
	st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")