File size: 7,378 Bytes
8ba7a1f
2738ccd
12e44e3
 
 
 
b0bacdc
 
35c3575
8ba7a1f
f6019ba
8ba7a1f
 
12e44e3
8ba7a1f
800c748
aa35944
 
 
 
 
 
a3902aa
5c310ce
89f1741
 
 
12e44e3
 
 
89f1741
 
 
12e44e3
89f1741
800c748
 
 
 
 
 
89f1741
8ba7a1f
 
 
35c3575
022e3b7
8ba7a1f
 
12e44e3
 
 
b0bacdc
12e44e3
 
b0bacdc
12e44e3
 
 
8ba7a1f
 
 
 
4e43718
89f1741
aa35944
f6019ba
12e44e3
f6019ba
12e44e3
 
 
 
4e43718
b0bacdc
12e44e3
 
 
 
4e43718
b0bacdc
bb180db
 
 
 
800c748
bb180db
fa2fb9a
 
 
 
 
 
 
 
 
 
d936000
fa2fb9a
 
 
 
 
 
 
 
b0bacdc
fa2fb9a
 
 
 
 
b0bacdc
fa2fb9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800c748
fa2fb9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800c748
fa2fb9a
 
800c748
cf4cc4f
 
 
 
fa2fb9a
800c748
cf4cc4f
fa2fb9a
8ba7a1f
800c748
 
 
 
 
 
12e44e3
8ba7a1f
12e44e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import streamlit as st
from helper import (
    load_dataset, search, get_file_paths,
    get_cordinates, get_images_from_s3_to_display,
    get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
)
import os
import time


# Load environment variables
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

# Predefined list of datasets
datasets = ["MajorTom-Germany", "MajorTom-Netherlands", "WayveScenes"]
folder_path_dict = {
    "WayveScenes": "WayveScenes/",
    "MajorTom-Germany": "MajorTOM-DE/",
    "MajorTom-Netherlands": "MajorTOM-NL/",
    "MajorTom-UK" :""
}
description = {
    "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from vehicles to advance AI perception and navigation in complex environments.",
    "MajorTom-Germany": "A geospatial dataset containing satellite imagery from across Germany, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
    "MajorTom-Netherlands": "A geospatial dataset containing satellite imagery from across Netherlands, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics.",
    "MajorTom-UK" :"A geospatial dataset containing satellite imagery from across the United Kingdom, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."
}
selection = {
    'WayveScenes': [1, 8],
    "MajorTom-Germany": [1, 1],
    "MajorTom-Netherlands": [1,1],
    "MajorTom-UK": [1,1]
}

example_queries = {
    'WayveScenes': "Parking Signs, Pedestrian Crossing, Traffic Light (Red, Green, Orange)",
    "MajorTom-Germany": "Airports, Golf Courses, Wind Mills, Solar Panels ",
    "MajorTom-Netherlands": "Airports, Golf Courses, Wind Mills, Solar Panels ",
    "MajorTom-UK": "Airports, Golf Courses, Wind Mills, Solar Panels "
}

# AWS S3 bucket name
bucket_name = "datasets-quasara-io"



# Streamlit App
def main():
    # Initialize session state variables if not already initialized
    if 'search_in_small_objects' not in st.session_state:
        st.session_state.search_in_small_objects = False

    if 'dataset_number' not in st.session_state:
        st.session_state.dataset_number = 1

    if 'df' not in st.session_state:
        st.session_state.df = None

    st.title("Semantic Search and Image Display")

    # Select dataset from dropdown
    dataset_name = st.selectbox("Select Dataset", datasets)
    st.session_state.df = None
    #For Loading from Box
    folder_path = folder_path_dict[dataset_name]

    st.caption(description[dataset_name])

    if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects):
        st.session_state.search_in_small_objects = True
        st.text("Small Object Search Enabled")
        st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
        st.session_state.df = None
        st.text(f"You have selected Split Dataset {st.session_state.dataset_number}")
    else:
        st.session_state.search_in_small_objects = False
        st.text("Small Object Search Disabled")
        st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
        st.session_state.df = None
        st.text(f"You have selected Main Dataset {st.session_state.dataset_number}")
    
    df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=1)
    dataset_limit = st.slider("Size of Dataset to be searched from", min_value=0, max_value=min(total_rows, 80000), value=int(min(total_rows, 80000)/2))
    st.text(f'The smaller the dataset the faster the search will work.')
    st.text('Please click Load Dataset to finalize selection for search')
    
    
    #Loading Dataset
    loading_dataset_text = st.empty()
    loading_dataset_text.text("Loading Dataset...")
    loading_dataset_bar = st.progress(0)
    
    # Simulate dataset loading progress
    for i in range(0, 100, 25):
        time.sleep(0.2)  # Simulate work being done
        loading_dataset_bar.progress(i + 25)

    # Load dataset
    df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
    
    # Store loaded dataset in session state
    st.session_state.df = df
    loading_dataset_bar.progress(100)
    loading_dataset_text.text("Dataset loaded successfully!")
    st.success(f"Dataset loaded successfully with {len(df)} rows.")
            
    # After dataset is loaded, show search options
    query = st.text_input("Enter your search query")
    st.text(f"Example Queries for your Dataset: {example_queries[dataset_name]}")
    # Number of results to display
    limit = st.number_input("Number of results to display", min_value=1, max_value=100, value=10)
            
    # Search button
    if st.button("Search"):
        # Validate input
        if not query:
            st.warning("Please enter a search query.")
        else:
            try:
                # Progress bar for search
                search_loading_text = st.empty()
                search_loading_text.text("Searching...")
                search_progress_bar = st.progress(0)

                # Perform search on the loaded dataset from session state
                df = st.session_state.df
                if st.session_state.search_in_small_objects:
                    results = search(query, df, limit)
                    top_k_paths = get_file_paths(df, results)
                    top_k_cordinates = get_cordinates(df, results)
                    search_type = 'Splits'
                else:
                    # Normal Search
                    results = search(query, df, limit)
                    top_k_paths = get_file_paths(df, results)
                    search_type = 'Main'

                    # Complete the search progress
                    search_progress_bar.progress(100)
                    search_loading_text.text(f"Search completed among {dataset_limit} rows for {dataset_name} in {search_type} {st.session_state.dataset_number}")

                    # Load Images with Bounding Boxes if applicable
                    if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
                        get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
                    elif not st.session_state.search_in_small_objects and top_k_paths:
                        st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
                        get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
                            
                    else:
                        st.write("No results found.")

            except Exception as e:
                if 'None' in e:
                st.warning("Please Click Load Dataset")
                else:
                    st.error(f"Search failed: {e}")


            

if __name__ == "__main__":
    main()

            



if __name__ == "__main__":
    main()