File size: 7,378 Bytes
8ba7a1f 2738ccd 12e44e3 b0bacdc 35c3575 8ba7a1f f6019ba 8ba7a1f 12e44e3 8ba7a1f 800c748 aa35944 a3902aa 5c310ce 89f1741 12e44e3 89f1741 12e44e3 89f1741 800c748 89f1741 8ba7a1f 35c3575 022e3b7 8ba7a1f 12e44e3 b0bacdc 12e44e3 b0bacdc 12e44e3 8ba7a1f 4e43718 89f1741 aa35944 f6019ba 12e44e3 f6019ba 12e44e3 4e43718 b0bacdc 12e44e3 4e43718 b0bacdc bb180db 800c748 bb180db fa2fb9a d936000 fa2fb9a b0bacdc fa2fb9a b0bacdc fa2fb9a 800c748 fa2fb9a 800c748 fa2fb9a 800c748 cf4cc4f fa2fb9a 800c748 cf4cc4f fa2fb9a 8ba7a1f 800c748 12e44e3 8ba7a1f 12e44e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import streamlit as st
from helper import (
load_dataset, search, get_file_paths,
get_cordinates, get_images_from_s3_to_display,
get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
)
import os
import time
# Load environment variables
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
# Predefined list of datasets
datasets = ["MajorTom-Germany", "MajorTom-Netherlands", "WayveScenes"]
folder_path_dict = {
"WayveScenes": "WayveScenes/",
"MajorTom-Germany": "MajorTOM-DE/",
"MajorTom-Netherlands": "MajorTOM-NL/",
"MajorTom-UK" :""
}
description = {
"WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from vehicles to advance AI perception and navigation in complex environments.",
"MajorTom-Germany": "A geospatial dataset containing satellite imagery from across Germany, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
"MajorTom-Netherlands": "A geospatial dataset containing satellite imagery from across Netherlands, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
"MajorTom-UK" :"A geospatial dataset containing satellite imagery from across the United Kingdom, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."
}
selection = {
'WayveScenes': [1, 8],
"MajorTom-Germany": [1, 1],
"MajorTom-Netherlands": [1,1],
"MajorTom-UK": [1,1]
}
example_queries = {
'WayveScenes': "Parking Signs, Pedestrian Crossing, Traffic Light (Red, Green, Orange)",
"MajorTom-Germany": "Airports, Golf Courses, Wind Mills, Solar Panels ",
"MajorTom-Netherlands": "Airports, Golf Courses, Wind Mills, Solar Panels ",
"MajorTom-UK": "Airports, Golf Courses, Wind Mills, Solar Panels "
}
# AWS S3 bucket name
bucket_name = "datasets-quasara-io"
# Streamlit App
def main():
# Initialize session state variables if not already initialized
if 'search_in_small_objects' not in st.session_state:
st.session_state.search_in_small_objects = False
if 'dataset_number' not in st.session_state:
st.session_state.dataset_number = 1
if 'df' not in st.session_state:
st.session_state.df = None
st.title("Semantic Search and Image Display")
# Select dataset from dropdown
dataset_name = st.selectbox("Select Dataset", datasets)
st.session_state.df = None
#For Loading from Box
folder_path = folder_path_dict[dataset_name]
st.caption(description[dataset_name])
if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects):
st.session_state.search_in_small_objects = True
st.text("Small Object Search Enabled")
st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
st.session_state.df = None
st.text(f"You have selected Split Dataset {st.session_state.dataset_number}")
else:
st.session_state.search_in_small_objects = False
st.text("Small Object Search Disabled")
st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
st.session_state.df = None
st.text(f"You have selected Main Dataset {st.session_state.dataset_number}")
df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=1)
dataset_limit = st.slider("Size of Dataset to be searched from", min_value=0, max_value=min(total_rows, 80000), value=int(min(total_rows, 80000)/2))
st.text(f'The smaller the dataset the faster the search will work.')
st.text('Please click Load Dataset to finalize selection for search')
#Loading Dataset
loading_dataset_text = st.empty()
loading_dataset_text.text("Loading Dataset...")
loading_dataset_bar = st.progress(0)
# Simulate dataset loading progress
for i in range(0, 100, 25):
time.sleep(0.2) # Simulate work being done
loading_dataset_bar.progress(i + 25)
# Load dataset
df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
# Store loaded dataset in session state
st.session_state.df = df
loading_dataset_bar.progress(100)
loading_dataset_text.text("Dataset loaded successfully!")
st.success(f"Dataset loaded successfully with {len(df)} rows.")
# After dataset is loaded, show search options
query = st.text_input("Enter your search query")
st.text(f"Example Queries for your Dataset: {example_queries[dataset_name]}")
# Number of results to display
limit = st.number_input("Number of results to display", min_value=1, max_value=100, value=10)
# Search button
if st.button("Search"):
# Validate input
if not query:
st.warning("Please enter a search query.")
else:
try:
# Progress bar for search
search_loading_text = st.empty()
search_loading_text.text("Searching...")
search_progress_bar = st.progress(0)
# Perform search on the loaded dataset from session state
df = st.session_state.df
if st.session_state.search_in_small_objects:
results = search(query, df, limit)
top_k_paths = get_file_paths(df, results)
top_k_cordinates = get_cordinates(df, results)
search_type = 'Splits'
else:
# Normal Search
results = search(query, df, limit)
top_k_paths = get_file_paths(df, results)
search_type = 'Main'
# Complete the search progress
search_progress_bar.progress(100)
search_loading_text.text(f"Search completed among {dataset_limit} rows for {dataset_name} in {search_type} {st.session_state.dataset_number}")
# Load Images with Bounding Boxes if applicable
if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
elif not st.session_state.search_in_small_objects and top_k_paths:
st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
else:
st.write("No results found.")
except Exception as e:
if 'None' in e:
st.warning("Please Click Load Dataset")
else:
st.error(f"Search failed: {e}")
if __name__ == "__main__":
main()
if __name__ == "__main__":
main()
|