File size: 4,671 Bytes
8ba7a1f
a3902aa
8ba7a1f
f6019ba
8ba7a1f
f6019ba
8ba7a1f
 
 
ae436b7
a3902aa
 
 
 
8ba7a1f
 
 
 
 
 
 
 
 
f6019ba
a3902aa
 
 
 
 
f6019ba
 
 
 
 
 
 
 
 
 
4650494
a3902aa
f6019ba
 
 
 
 
8ba7a1f
 
 
 
 
a3902aa
 
 
 
 
 
 
8ba7a1f
 
 
 
 
 
f6019ba
 
 
 
 
 
 
 
 
8ba7a1f
a3902aa
 
 
8ba7a1f
a3902aa
 
 
 
 
 
 
 
 
 
 
 
 
 
8ba7a1f
a3902aa
 
 
 
 
 
 
 
 
 
 
 
 
8ba7a1f
a3902aa
8ba7a1f
a3902aa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import streamlit as st
from helper import load_dataset, parallel_load_and_combine,search, get_file_paths, get_cordinates, get_images_from_s3_to_display, get_images_with_bounding_boxes_from_s3,  batch_search
import os
import time

# Load environment variables
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
# Predefined list of datasets
datasets = ["WayveScenes","MajorTom-Europe"]  # Example dataset names
description = {
        "StopSign_test" : "A test dataset for me",
        "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from autonomous vehicles to advance AI perception and navigation in complex environments.",
        "MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."}
# AWS S3 bucket name
bucket_name = "datasets-quasara-io"

# Streamlit App
def main():
    st.title("Semantic Search and Image Display")

    # Select dataset from dropdown
    dataset_name = st.selectbox("Select Dataset", datasets)
    
    if dataset_name == 'StopSign_test':
        folder_path = ""
    else:
        folder_path = f'{dataset_name}/'
    st.caption(description[dataset_name]) #trial area
    # Progress bar for loading dataset
    loading_text = st.empty()  # Placeholder for dynamic text
    loading_text.text("Loading dataset...")
    progress_bar = st.progress(0)
    
    # Simulate dataset loading progress
    for i in range(0, 100, 25):
        time.sleep(0.2)  # Simulate work being done
        progress_bar.progress(i + 25)
    
    # Load the selected dataset
    dataset = load_dataset(f"quasara-io/{dataset_name}")

    # Complete progress when loading is done
    progress_bar.progress(100)
    loading_text.text("Dataset loaded successfully!")

    # Input search query
    query = st.text_input("Enter your search query")

    # Number of results to display
    limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)
    if st.checkbox("Enable Small Object Search"):
        search_in_small_objects = True
        st.text("Small Object Search Enabled")
    else:
        search_in_small_objects = False
        st.text("Small Object Search Disabled")
    
    # Search button
    if st.button("Search"):
        # Validate input
        if not query:
            st.warning("Please enter a search query.")
        else:
            # Progress bar for search
            search_loading_text = st.empty()
            search_loading_text.text("Performing search...")
            search_progress_bar = st.progress(0)
            
            # Simulate search progress (e.g., in 4 steps)
            for i in range(0, 100, 25):
                time.sleep(0.3)  # Simulate work being done
                search_progress_bar.progress(i + 25)

            #Get Dataset Keys to speed up processing/search
            dataset_keys = dataset.keys()
            main_df,split_df = parallel_load_and_combine(dataset_keys,dataset)

            #Small Search
            if search_in_small_objects:
                # Perform the search
                results = batch_search(query, split_df)
                top_k_paths = get_file_paths(split_df,results)
                top_k_cordinates = get_cordinates(split_df, results)
                # Complete the search progress
                search_progress_bar.progress(100)
                search_loading_text.text("Search completed!")
                #Load Images with Bounding boxes
                if top_k_paths and top_k_cordinates:
                    get_images_with_bounding_boxes_from_s3(bucket_name,top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
                else:
                    st.write("No results found.")
            else:
            #Normal Search
                results = batch_search(query, main_df)
                top_k_paths = get_file_paths(main_df, results)
                # Complete the search progress
                search_progress_bar.progress(100)
                search_loading_text.text("Search completed!")
                #Load Images
                # Display images from S3
                if top_k_paths:
                    st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
                    get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
                else:
                    st.write("No results found.")

    
if __name__ == "__main__":
    main()