File size: 5,810 Bytes
8ba7a1f 022e3b7 e382418 12e44e3 022e3b7 8ba7a1f f6019ba 8ba7a1f 12e44e3 8ba7a1f 12e44e3 a3902aa 12e44e3 022e3b7 12e44e3 8ba7a1f 022e3b7 8ba7a1f 12e44e3 8ba7a1f 022e3b7 8ba7a1f 12e44e3 a3902aa f6019ba 12e44e3 f6019ba 12e44e3 60b51d3 d936000 12e44e3 60b51d3 022e3b7 60b51d3 022e3b7 60b51d3 d936000 022e3b7 12e44e3 022e3b7 60b51d3 12e44e3 022e3b7 d936000 12e44e3 022e3b7 12e44e3 d936000 8ba7a1f 12e44e3 8ba7a1f 12e44e3 022e3b7 12e44e3 a3902aa 12e44e3 a3902aa 022e3b7 12e44e3 a3902aa 8ba7a1f 12e44e3 022e3b7 12e44e3 8ba7a1f 12e44e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import streamlit as st
import logging
import os
import time
import psutil
from helper import (
load_dataset, search, get_file_paths,
get_cordinates, get_images_from_s3_to_display,
get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
)
# Configure logging
logging.basicConfig(level=logging.INFO)
# Load environment variables
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
# Predefined list of datasets
datasets = ["WayveScenes", "MajorTom-Europe"]
description = {
"StopSign_test": "A test dataset for me",
"WayveScenes": "A large-scale dataset featuring diverse urban driving scenes.",
"MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe."
}
selection = {
'WayveScenes': [1, 8],
"MajorTom-Europe": [1, 18]
}
# AWS S3 bucket name
bucket_name = "datasets-quasara-io"
# Function to log CPU and memory usage
def log_resource_usage(stage):
cpu_usage = psutil.cpu_percent(interval=1)
memory_info = psutil.virtual_memory()
logging.info(f"{stage} - CPU Usage: {cpu_usage}%, Memory Usage: {memory_info.percent}%")
# Streamlit App
def main():
# Initialize session state variables if not already initialized
if 'search_in_small_objects' not in st.session_state:
st.session_state.search_in_small_objects = False
if 'dataset_number' not in st.session_state:
st.session_state.dataset_number = 1
if 'df' not in st.session_state:
st.session_state.df = None
st.title("Semantic Search and Image Display")
log_resource_usage("Initialization")
# Select dataset from dropdown
dataset_name = st.selectbox("Select Dataset", datasets)
if dataset_name == 'StopSign_test':
folder_path = ""
else:
folder_path = f'{dataset_name}/'
st.caption(description[dataset_name])
if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects):
st.session_state.search_in_small_objects = True
st.text("Small Object Search Enabled")
st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
else:
st.session_state.search_in_small_objects = False
st.text("Small Object Search Disabled")
st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
dataset_limit = st.slider("Size of Dataset to be searched from", min_value=1000, max_value=20000, value=10000)
st.text(f'The smaller the dataset, the faster the search will work.')
# Load dataset with limit only if not already loaded
if st.button("Load Dataset"):
try:
loading_dataset_text = st.empty()
loading_dataset_text.text("Loading Dataset...")
loading_dataset_bar = st.progress(0)
# Simulate dataset loading progress
for i in range(0, 100, 25):
time.sleep(0.2)
loading_dataset_bar.progress(i + 25)
log_resource_usage("Before Loading Dataset")
df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
st.session_state.df = df
loading_dataset_bar.progress(100)
loading_dataset_text.text("Dataset loaded successfully!")
st.success(f"Dataset loaded successfully with {len(df)} rows.")
log_resource_usage("After Loading Dataset")
except Exception as e:
logging.error(f"Failed to load dataset: {e}")
st.error(f"Failed to load dataset: {e}")
# Input search query
query = st.text_input("Enter your search query")
# Number of results to display
limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)
# Search button
if st.button("Search"):
# Validate input
if not query:
st.warning("Please enter a search query.")
else:
try:
search_loading_text = st.empty()
search_loading_text.text("Searching...")
search_progress_bar = st.progress(0)
log_resource_usage("Before Search")
df = st.session_state.df
if st.session_state.search_in_small_objects:
results = search(query, df, limit)
top_k_paths = get_file_paths(df, results)
top_k_cordinates = get_cordinates(df, results)
else:
results = search(query, df, limit)
top_k_paths = get_file_paths(df, results)
search_progress_bar.progress(100)
search_loading_text.text("Search completed!")
log_resource_usage("After Search")
# Load Images with Bounding Boxes if applicable
if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
elif not st.session_state.search_in_small_objects and top_k_paths:
st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
else:
st.write("No results found.")
except Exception as e:
logging.error(f"Search failed: {e}")
st.error(f"Search failed: {e}")
if __name__ == "__main__":
main()
|