inie2003 commited on
Commit
12e44e3
·
verified ·
1 Parent(s): a613c68

Added small object search, and also new loading features

Browse files

We will only load x number of rows randomly from dataset instead of all because of limited memory

Files changed (1) hide show
  1. app.py +82 -65
app.py CHANGED
@@ -1,110 +1,127 @@
1
  import streamlit as st
2
- from helper import load_dataset, parallel_load_and_combine,search, get_file_paths, get_cordinates, get_images_from_s3_to_display, get_images_with_bounding_boxes_from_s3, batch_search
 
 
 
 
3
  import os
4
  import time
5
 
6
  # Load environment variables
7
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
8
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 
9
  # Predefined list of datasets
10
- datasets = ["WayveScenes","MajorTom-Europe"] # Example dataset names
11
  description = {
12
- "StopSign_test" : "A test dataset for me",
13
- "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from autonomous vehicles to advance AI perception and navigation in complex environments.",
14
- "MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."}
 
 
 
 
 
 
15
  # AWS S3 bucket name
16
  bucket_name = "datasets-quasara-io"
17
 
18
  # Streamlit App
19
  def main():
 
 
 
 
 
 
 
 
 
 
20
  st.title("Semantic Search and Image Display")
21
 
22
  # Select dataset from dropdown
23
  dataset_name = st.selectbox("Select Dataset", datasets)
24
-
25
  if dataset_name == 'StopSign_test':
26
  folder_path = ""
27
  else:
28
  folder_path = f'{dataset_name}/'
29
- st.caption(description[dataset_name]) #trial area
30
- # Progress bar for loading dataset
31
- loading_text = st.empty() # Placeholder for dynamic text
32
- loading_text.text("Loading dataset...")
33
- progress_bar = st.progress(0)
34
-
35
- # Simulate dataset loading progress
36
- for i in range(0, 100, 25):
37
- time.sleep(0.2) # Simulate work being done
38
- progress_bar.progress(i + 25)
39
-
40
- # Load the selected dataset
41
- dataset = load_dataset(f"quasara-io/{dataset_name}")
42
 
43
- # Complete progress when loading is done
44
- progress_bar.progress(100)
45
- loading_text.text("Dataset loaded successfully!")
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # Input search query
48
  query = st.text_input("Enter your search query")
49
 
50
  # Number of results to display
51
  limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)
52
- if st.checkbox("Enable Small Object Search"):
53
- search_in_small_objects = True
54
- st.text("Small Object Search Enabled")
55
- else:
56
- search_in_small_objects = False
57
- st.text("Small Object Search Disabled")
58
-
59
  # Search button
60
  if st.button("Search"):
61
  # Validate input
62
  if not query:
63
  st.warning("Please enter a search query.")
64
  else:
65
- # Progress bar for search
66
- search_loading_text = st.empty()
67
- search_loading_text.text("Performing search...")
68
- search_progress_bar = st.progress(0)
69
-
70
- # Simulate search progress (e.g., in 4 steps)
71
- for i in range(0, 100, 25):
72
- time.sleep(0.3) # Simulate work being done
73
- search_progress_bar.progress(i + 25)
74
-
75
- #Get Dataset Keys to speed up processing/search
76
- dataset_keys = dataset.keys()
77
- main_df,split_df = parallel_load_and_combine(dataset_keys,dataset)
78
-
79
- #Small Search
80
- if search_in_small_objects:
81
- # Perform the search
82
- results = batch_search(query, split_df)
83
- top_k_paths = get_file_paths(split_df,results)
84
- top_k_cordinates = get_cordinates(split_df, results)
85
- # Complete the search progress
86
- search_progress_bar.progress(100)
87
- search_loading_text.text("Search completed!")
88
- #Load Images with Bounding boxes
89
- if top_k_paths and top_k_cordinates:
90
- get_images_with_bounding_boxes_from_s3(bucket_name,top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
91
  else:
92
- st.write("No results found.")
93
- else:
94
- #Normal Search
95
- results = batch_search(query, main_df)
96
- top_k_paths = get_file_paths(main_df, results)
97
  # Complete the search progress
98
  search_progress_bar.progress(100)
99
  search_loading_text.text("Search completed!")
100
- #Load Images
101
- # Display images from S3
102
- if top_k_paths:
 
 
103
  st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
104
  get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
 
105
  else:
106
  st.write("No results found.")
107
 
108
-
 
 
109
  if __name__ == "__main__":
110
- main()
 
1
  import streamlit as st
2
+ from helper import (
3
+ load_dataset, search, get_file_paths,
4
+ get_cordinates, get_images_from_s3_to_display,
5
+ get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
6
+ )
7
  import os
8
  import time
9
 
10
  # Load environment variables
11
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
12
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
13
+
14
  # Predefined list of datasets
15
+ datasets = ["WayveScenes", "MajorTom-Europe"]
16
  description = {
17
+ "StopSign_test": "A test dataset for me",
18
+ "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from autonomous vehicles to advance AI perception and navigation in complex environments.",
19
+ "MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."
20
+ }
21
+ selection = {
22
+ 'WayveScenes': [1, 8],
23
+ "MajorTom-Europe": [1, 18]
24
+ }
25
+
26
  # AWS S3 bucket name
27
  bucket_name = "datasets-quasara-io"
28
 
29
  # Streamlit App
30
  def main():
31
+ # Initialize session state variables if not already initialized
32
+ if 'search_in_small_objects' not in st.session_state:
33
+ st.session_state.search_in_small_objects = False
34
+
35
+ if 'dataset_number' not in st.session_state:
36
+ st.session_state.dataset_number = 1
37
+
38
+ if 'df' not in st.session_state:
39
+ st.session_state.df = None
40
+
41
  st.title("Semantic Search and Image Display")
42
 
43
  # Select dataset from dropdown
44
  dataset_name = st.selectbox("Select Dataset", datasets)
45
+
46
  if dataset_name == 'StopSign_test':
47
  folder_path = ""
48
  else:
49
  folder_path = f'{dataset_name}/'
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ st.caption(description[dataset_name])
 
 
52
 
53
+ if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects):
54
+ st.session_state.search_in_small_objects = True
55
+ st.text("Small Object Search Enabled")
56
+ st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
57
+ st.text(f"You have selected Split Dataset {st.session_state.dataset_number}")
58
+ else:
59
+ st.session_state.search_in_small_objects = False
60
+ st.text("Small Object Search Disabled")
61
+ st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
62
+ st.text(f"You have selected Main Dataset {st.session_state.dataset_number}")
63
+
64
+
65
+ dataset_limit = st.slider("Size of Dataset to be searched from", min_value=1000, max_value=10000, value=5000)
66
+ st.text(f'The smaller the dataset the faster the search will work.')
67
+
68
+ # Load dataset with limit only if not already loaded
69
+ if st.button("Load Dataset"):
70
+ try:
71
+ df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
72
+ # Store loaded dataset in session state
73
+ st.session_state.df = df
74
+ st.success(f"Dataset loaded successfully with {len(df)} rows.")
75
+
76
+ except Exception as e:
77
+ st.error(f"Failed to load dataset: {e}")
78
+
79
+
80
  # Input search query
81
  query = st.text_input("Enter your search query")
82
 
83
  # Number of results to display
84
  limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)
85
+
 
 
 
 
 
 
86
  # Search button
87
  if st.button("Search"):
88
  # Validate input
89
  if not query:
90
  st.warning("Please enter a search query.")
91
  else:
92
+ try:
93
+ # Progress bar for search
94
+ search_loading_text = st.empty()
95
+ search_loading_text.text("Searching...")
96
+ search_progress_bar = st.progress(0)
97
+
98
+ # Perform search on the loaded dataset from session state
99
+ df = st.session_state.df
100
+ if st.session_state.search_in_small_objects:
101
+ results = search(query, df, limit)
102
+ top_k_paths = get_file_paths(df, results)
103
+ top_k_cordinates = get_cordinates(df, results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  else:
105
+ # Normal Search
106
+ results = search(query, df, limit)
107
+ top_k_paths = get_file_paths(df, results)
108
+
 
109
  # Complete the search progress
110
  search_progress_bar.progress(100)
111
  search_loading_text.text("Search completed!")
112
+
113
+ # Load Images with Bounding Boxes if applicable
114
+ if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
115
+ get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
116
+ elif not st.session_state.search_in_small_objects and top_k_paths:
117
  st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
118
  get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
119
+
120
  else:
121
  st.write("No results found.")
122
 
123
+ except Exception as e:
124
+ st.error(f"Search failed: {e}")
125
+
126
  if __name__ == "__main__":
127
+ main()