File size: 13,726 Bytes
10d0bac
 
 
be85e86
2619083
15b2d37
be85e86
 
15b2d37
2619083
15b2d37
be85e86
15b2d37
 
be85e86
15b2d37
be85e86
15b2d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be85e86
 
 
15b2d37
be85e86
 
15b2d37
be85e86
 
 
 
 
15b2d37
 
be85e86
15b2d37
 
 
be85e86
 
 
 
 
 
15b2d37
 
 
 
 
be85e86
 
15b2d37
be85e86
 
 
15b2d37
be85e86
15b2d37
 
 
 
 
be85e86
 
 
 
15b2d37
be85e86
 
 
 
 
 
 
 
 
 
 
 
 
 
15b2d37
 
be85e86
 
 
15b2d37
be85e86
 
 
 
 
 
15b2d37
be85e86
15b2d37
be85e86
15b2d37
be85e86
 
 
 
 
 
 
 
15b2d37
 
be85e86
 
 
15b2d37
 
 
 
 
 
 
 
 
be85e86
15b2d37
be85e86
 
 
15b2d37
be85e86
 
15b2d37
be85e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15b2d37
 
be85e86
 
 
 
 
 
 
 
 
 
 
 
15b2d37
be85e86
 
 
 
 
15b2d37
be85e86
 
15b2d37
be85e86
15b2d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be85e86
15b2d37
be85e86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15b2d37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be85e86
 
 
15b2d37
 
 
 
be85e86
 
 
 
 
 
15b2d37
be85e86
 
 
 
15b2d37
be85e86
 
15b2d37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
import streamlit as st
from streamlit_folium import st_folium
import folium
from folium.plugins import Draw
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon, Point
import numpy as np
import re # For parsing STATEDAREA

st.set_page_config(layout="wide", page_title="Multiplex Coop Housing Filter")

st.title("🗺️ Multiplex Coop Housing Filter (Hugging Face Data)")
st.write("This app uses the `ProjectMultiplexCoop/PropertyBoundaries` dataset from Hugging Face. Draw a polygon on the map to spatially filter properties. Use the form below to apply additional filters based on property attributes. **Note: FSI, Building Coverage, Height, and Stories are synthetic for demonstration as they are not directly available in the dataset.**")

# --- 1. Load Data from Hugging Face and Process ---
@st.cache_data
def load_and_process_data():
    """
    Loads the geospatial data from Hugging Face, processes relevant columns,
    and generates synthetic data for missing attributes.
    """
    try:
        # Load the geospatial data using geopandas
        # Ensure you have 'huggingface_hub', 'geopandas', 'fiona', 'pyproj', 'shapely' installed.
        gdf = gpd.read_parquet("hf://datasets/ProjectMultiplexCoop/PropertyBoundaries/Property_Boundaries_4326.parquet")
    except Exception as e:
        st.error(f"Failed to load data from Hugging Face. Please ensure `huggingface_hub`, `geopandas`, `fiona`, and `pyproj` are installed. Error: {e}")
        st.stop()

    # Process STATEDAREA to numeric (Lot Area in Sq Metres)
    # The format is like "17366.998291 sq.m"
    def parse_stated_area(area_str):
        if pd.isna(area_str):
            return np.nan
        match = re.search(r'(\d+\.?\d*)\s*sq\.m', str(area_str))
        if match:
            return float(match.group(1))
        return np.nan

    gdf['zn_area'] = gdf['STATEDAREA'].apply(parse_stated_area)

    # Map FEATURE_TYPE to zn_type (Zoning Type)
    gdf['zn_type'] = gdf['FEATURE_TYPE']

    # Generate synthetic data for attributes not present in the Hugging Face dataset
    # but required for the filter functionality as per the original HTML.
    num_rows = len(gdf)
    gdf['fsi_total'] = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
    gdf['prcnt_cver'] = np.random.randint(20, 70, num_rows)
    gdf['height_metres'] = np.round(np.random.uniform(5, 30, num_rows), 1)
    gdf['stories'] = np.random.randint(2, 10, num_rows)

    # Add unique ID and a display name
    gdf['id'] = range(1, num_rows + 1)
    gdf['name'] = gdf['PARCELID'].apply(lambda x: f"Parcel {x}")

    # Ensure geometries are valid for centroid calculation and plotting
    # .buffer(0) is a common trick to fix minor geometry issues
    gdf['geometry'] = gdf['geometry'].buffer(0)
    # Extract centroids for point-based filtering and initial map markers
    gdf['latitude'] = gdf.geometry.centroid.y
    gdf['longitude'] = gdf.geometry.centroid.x

    # Select and reorder relevant columns for display and filtering
    df_processed = gdf[[
        'id', 'name', 'latitude', 'longitude', 'geometry',
        'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories',
        'PARCELID', # Original Parcel ID for reference
        'ADDRESS_NUMBER', 'LINEAR_NAME_FULL' # For detailed address in tooltips
    ]].copy()

    return df_processed

df = load_and_process_data()

# Initialize filtered_df with the full dataframe for initial state
filtered_df = df.copy()

# --- 2. Initialize the Folium Map with Drawing Tools ---
# Center the map around the mean of the actual data's centroids
m = folium.Map(location=[df['latitude'].mean(), df['longitude'].mean()], zoom_start=12)

# Add drawing tools to the map
draw = Draw(
    export=True,
    filename="drawn_polygon.geojson",
    position="topleft",
    draw_options={
        "polyline": False, "rectangle": False, "circlemarker": False,
        "circle": False, "marker": False,
        "polygon": {
            "allowIntersection": False, # Restricts polygons to not intersect themselves
            "drawError": {"color": "#e0115f", "message": "Oups!"},
            "shapeOptions": {"color": "#ef233c", "fillOpacity": 0.5},
        },
    },
    edit_options={"edit": False, "remove": True},
)
m.add_child(draw)

# Add a sample of points to the initial map for responsiveness
# Plotting all 500k+ polygons/points at once can cause performance issues.
sample_df_for_initial_map = df.sample(min(1000, len(df)), random_state=42) # Sample up to 1000 points

for idx, row in sample_df_for_initial_map.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=3, # Smaller radius for denser data points
        color='blue',
        fill=True,
        fill_color='blue',
        fill_opacity=0.5,
        tooltip=(
            f"Parcel ID: {row['PARCELID']}<br>Name: {row['name']}<br>Zoning: {row['zn_type']}<br>"
            f"Area: {row['zn_area'] if pd.notna(row['zn_area']) else 'N/A'} m²<br>"
            f"FSI: {row['fsi_total']}<br>Coverage: {row['prcnt_cver']}%<br>"
            f"Height: {row['height_metres']}m<br>Stories: {row['stories']}<br>"
            f"Address: {row['ADDRESS_NUMBER'] if pd.notna(row['ADDRESS_NUMBER']) else ''} {row['LINEAR_NAME_FULL'] if pd.notna(row['LINEAR_NAME_FULL']) else ''}"
        )
    ).add_to(m)

st.subheader("Draw a Polygon on the Map")
st.info(f"Displaying a sample of {len(sample_df_for_initial_map)} points on the map for responsiveness. All {len(df)} properties will be used for filtering.")
output = st_folium(m, width=1000, height=600, returned_objects=["all_draw_features"])

polygon_drawn = False
shapely_polygon = None
polygon_coords = None

if output and output["all_draw_features"]:
    polygons = [
        feature["geometry"]["coordinates"]
        for feature in output["all_draw_features"]
        if feature["geometry"]["type"] == "Polygon"
    ]

    if polygons:
        polygon_coords = polygons[-1][0] # Get the coordinates of the last drawn polygon
        # Shapely Polygon expects (lon, lat) tuples, Folium provides (lat, lon)
        shapely_polygon = Polygon([(lon, lat) for lat, lon in polygon_coords])
        polygon_drawn = True

        # Apply spatial filter to the full dataframe based on centroid containment
        filtered_df = df[
            df.apply(
                lambda row: shapely_polygon.contains(Point(row['longitude'], row['latitude'])),
                axis=1
            )
        ].copy() # Use .copy() to avoid SettingWithCopyWarning
        st.success(f"Initially filtered {len(filtered_df)} properties within the drawn polygon.")
    else:
        st.info("Draw a polygon on the map to spatially filter properties.")
else:
    st.info("Draw a polygon on the map to spatially filter properties.")

# --- 3. Attribute Filtering Form ---
st.subheader("Filter Property Attributes")

with st.form("attribute_filters"):
    col1, col2 = st.columns(2)

    with col1:
        # Zoning Type filter
        # Get unique zoning types from the loaded data, including a default 'All' option
        all_zoning_types = ['All Resdidential Zoning (0, 101, 6)'] + sorted(df['zn_type'].unique().tolist())
        selected_zn_type = st.selectbox("Zoning Type", all_zoning_types, key="zn_type_select")

        # Lot Area in Sq Metres filter
        # Use actual min/max from data for number input range
        min_zn_area = st.number_input(
            "Minimum Lot Area in Sq Metres",
            min_value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
            value=float(df['zn_area'].min() if pd.notna(df['zn_area'].min()) else 0),
            step=100.0,
            key="zn_area_input"
        )

        # Floor Space Index (FSI) filter - Synthetic data
        min_fsi_total = st.number_input("Minimum Floor Space Index (FSI)", min_value=0.0, value=0.0, step=0.1, format="%.2f", key="fsi_total_input")

    with col2:
        # Building Percent Coverage filter - Synthetic data
        max_prcnt_cver = st.number_input("Maximum Building Percent Coverage (%)", min_value=0, value=100, step=1, key="prcnt_cver_input")

        # Height or Stories selection - Synthetic data
        height_stories_option = st.radio(
            "Filter by",
            ("Height", "Stories"),
            index=0, # Default to Height
            key="height_stories_radio"
        )

        # Single input field for height/stories, label changes dynamically
        if height_stories_option == "Height":
            min_height_value = st.number_input("Minimum Height in Metres", min_value=0.0, value=0.0, step=0.1, format="%.1f", key="height_input")
        else: # Stories
            min_stories_value = st.number_input("Minimum Stories", min_value=0, value=0, step=1, key="stories_input")

    submitted = st.form_submit_button("Apply Attribute Filters")

    if submitted:
        # Apply attribute filters to the already spatially filtered_df
        if selected_zn_type != 'All Resdidential Zoning (0, 101, 6)':
            filtered_df = filtered_df[filtered_df['zn_type'] == selected_zn_type]

        # Handle NaN values for zn_area before comparison by treating NaN as 0 for min comparison
        filtered_df = filtered_df[filtered_df['zn_area'].fillna(0) >= min_zn_area]

        if min_fsi_total > 0:
            filtered_df = filtered_df[filtered_df['fsi_total'] >= min_fsi_total]

        if max_prcnt_cver < 100: # Assuming 100% means no upper limit applied
            filtered_df = filtered_df[filtered_df['prcnt_cver'] <= max_prcnt_cver]

        if height_stories_option == "Height" and min_height_value > 0:
            filtered_df = filtered_df[filtered_df['height_metres'] >= min_height_value]
        elif height_stories_option == "Stories" and min_stories_value > 0:
            filtered_df = filtered_df[filtered_df['stories'] >= min_stories_value]

        st.success(f"Applied attribute filters. Total properties after all filters: {len(filtered_df)}")
    else:
        st.info("Adjust filters and click 'Apply Attribute Filters'.")


# --- 4. Display Filtered Data on a New Map and as a Table ---
st.subheader("Filtered Properties")

if not filtered_df.empty:
    # Create a new map to show only the filtered properties
    if len(filtered_df) > 0:
        # Calculate bounds for filtered data to set appropriate zoom
        min_lat, max_lat = filtered_df['latitude'].min(), filtered_df['latitude'].max()
        min_lon, max_lon = filtered_df['longitude'].min(), filtered_df['longitude'].max()

        # Adjust map center and zoom dynamically based on filtered data extent
        if min_lat == max_lat and min_lon == max_lon:
            filtered_map_center = [min_lat, min_lon]
            filtered_map_zoom = 18 # Very close zoom for single point
        else:
            filtered_map_center = [filtered_df['latitude'].mean(), filtered_df['longitude'].mean()]
            # Simple heuristic for zoom level based on spatial extent
            lat_diff = max_lat - min_lat
            lon_diff = max_lon - min_lon
            if max(lat_diff, lon_diff) < 0.001: filtered_map_zoom = 18
            elif max(lat_diff, lon_diff) < 0.01: filtered_map_zoom = 16
            elif max(lat_diff, lon_diff) < 0.1: filtered_map_zoom = 14
            else: filtered_map_zoom = 12
    else:
        # Fallback to original map center if no data is filtered
        filtered_map_center = [df['latitude'].mean(), df['longitude'].mean()]
        filtered_map_zoom = 12

    filtered_m = folium.Map(location=filtered_map_center, zoom_start=filtered_map_zoom)

    # Add the drawn polygon to the new map if it exists
    if polygon_drawn and polygon_coords:
        folium.Polygon(
            locations=polygon_coords, # Use original (lat,lon) for folium
            color="#ef233c",
            fill=True,
            fill_color="#ef233c",
            fill_opacity=0.5
        ).add_to(filtered_m)

    # Convert filtered_df back to GeoDataFrame for direct plotting of geometries
    filtered_gdf = gpd.GeoDataFrame(filtered_df, geometry='geometry')

    # Add filtered polygons to the map as GeoJSON layer
    folium.GeoJson(
        filtered_gdf.to_json(),
        style_function=lambda x: {
            'fillColor': 'green',
            'color': 'darkgreen',
            'weight': 1,
            'fillOpacity': 0.7
        },
        tooltip=folium.GeoJsonTooltip(
            fields=['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL'],
            aliases=['Parcel ID:', 'Zoning Type:', 'Lot Area (m²):', 'FSI:', 'Coverage (%):', 'Height (m):', 'Stories:', 'Address Num:', 'Street:'],
            localize=True
        )
    ).add_to(filtered_m)

    st_folium(filtered_m, width=1000, height=500)

    st.subheader("Filtered Properties Table")
    # Display relevant columns in the table
    display_cols = ['PARCELID', 'zn_type', 'zn_area', 'fsi_total', 'prcnt_cver', 'height_metres', 'stories', 'ADDRESS_NUMBER', 'LINEAR_NAME_FULL']
    st.dataframe(filtered_df[display_cols])

    # --- 5. Export Data Button ---
    csv = filtered_df.to_csv(index=False).encode('utf-8')
    st.download_button(
        label="Export Filtered Data to CSV",
        data=csv,
        file_name="multiplex_coop_filtered_properties.csv",
        mime="text/csv",
    )

else:
    st.warning("No properties match the current filters. Try adjusting your criteria or drawing a different polygon.")

st.markdown("---")
st.markdown("This app demonstrates spatial and attribute filtering on the ProjectMultiplexCoop/PropertyBoundaries dataset from Hugging Face. FSI, Building Coverage, Height, and Stories are synthetic for demonstration.")