File size: 5,810 Bytes
8ba7a1f
022e3b7
 
 
 
e382418
12e44e3
 
 
 
022e3b7
 
 
8ba7a1f
f6019ba
8ba7a1f
 
12e44e3
8ba7a1f
12e44e3
a3902aa
12e44e3
022e3b7
 
12e44e3
 
 
 
 
 
8ba7a1f
 
 
022e3b7
 
 
 
 
 
8ba7a1f
 
12e44e3
 
 
 
 
 
 
 
8ba7a1f
022e3b7
8ba7a1f
 
 
12e44e3
a3902aa
 
 
 
f6019ba
12e44e3
f6019ba
12e44e3
 
 
 
 
 
 
 
 
60b51d3
d936000
 
12e44e3
 
 
60b51d3
 
 
022e3b7
60b51d3
 
022e3b7
60b51d3
d936000
022e3b7
12e44e3
 
022e3b7
60b51d3
 
12e44e3
022e3b7
d936000
12e44e3
022e3b7
12e44e3
d936000
8ba7a1f
 
 
 
 
12e44e3
8ba7a1f
 
 
 
 
 
12e44e3
 
 
 
 
022e3b7
12e44e3
 
 
 
 
a3902aa
12e44e3
 
 
a3902aa
 
022e3b7
12e44e3
 
 
 
 
a3902aa
 
 
 
8ba7a1f
12e44e3
022e3b7
12e44e3
 
8ba7a1f
12e44e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st
import logging
import os
import time
import psutil
from helper import (
    load_dataset, search, get_file_paths,
    get_cordinates, get_images_from_s3_to_display,
    get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
)

# Configure logging
logging.basicConfig(level=logging.INFO)

# Load environment variables
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")

# Predefined list of datasets
datasets = ["WayveScenes", "MajorTom-Europe"]
description = {
    "StopSign_test": "A test dataset for me",
    "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes.",
    "MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe."
}
selection = {
    'WayveScenes': [1, 8],
    "MajorTom-Europe": [1, 18]
}

# AWS S3 bucket name
bucket_name = "datasets-quasara-io"

# Function to log CPU and memory usage
def log_resource_usage(stage):
    cpu_usage = psutil.cpu_percent(interval=1)
    memory_info = psutil.virtual_memory()
    logging.info(f"{stage} - CPU Usage: {cpu_usage}%, Memory Usage: {memory_info.percent}%")

# Streamlit App
def main():
    # Initialize session state variables if not already initialized
    if 'search_in_small_objects' not in st.session_state:
        st.session_state.search_in_small_objects = False
    if 'dataset_number' not in st.session_state:
        st.session_state.dataset_number = 1
    if 'df' not in st.session_state:
        st.session_state.df = None

    st.title("Semantic Search and Image Display")
    log_resource_usage("Initialization")

    # Select dataset from dropdown
    dataset_name = st.selectbox("Select Dataset", datasets)

    if dataset_name == 'StopSign_test':
        folder_path = ""
    else:
        folder_path = f'{dataset_name}/'

    st.caption(description[dataset_name])

    if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects):
        st.session_state.search_in_small_objects = True
        st.text("Small Object Search Enabled")
        st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
    else:
        st.session_state.search_in_small_objects = False
        st.text("Small Object Search Disabled")
        st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))

    dataset_limit = st.slider("Size of Dataset to be searched from", min_value=1000, max_value=20000, value=10000)
    st.text(f'The smaller the dataset, the faster the search will work.')

    # Load dataset with limit only if not already loaded
    if st.button("Load Dataset"):
        try:
            loading_dataset_text = st.empty()
            loading_dataset_text.text("Loading Dataset...")
            loading_dataset_bar = st.progress(0)

            # Simulate dataset loading progress
            for i in range(0, 100, 25):
                time.sleep(0.2)
                loading_dataset_bar.progress(i + 25)

            log_resource_usage("Before Loading Dataset")
            df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
            st.session_state.df = df

            loading_dataset_bar.progress(100)
            loading_dataset_text.text("Dataset loaded successfully!")
            st.success(f"Dataset loaded successfully with {len(df)} rows.")
            log_resource_usage("After Loading Dataset")

        except Exception as e:
            logging.error(f"Failed to load dataset: {e}")
            st.error(f"Failed to load dataset: {e}")

    # Input search query
    query = st.text_input("Enter your search query")

    # Number of results to display
    limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10)

    # Search button
    if st.button("Search"):
        # Validate input
        if not query:
            st.warning("Please enter a search query.")
        else:
            try:
                search_loading_text = st.empty()
                search_loading_text.text("Searching...")
                search_progress_bar = st.progress(0)

                log_resource_usage("Before Search")
                df = st.session_state.df
                if st.session_state.search_in_small_objects:
                    results = search(query, df, limit)
                    top_k_paths = get_file_paths(df, results)
                    top_k_cordinates = get_cordinates(df, results)
                else:
                    results = search(query, df, limit)
                    top_k_paths = get_file_paths(df, results)

                search_progress_bar.progress(100)
                search_loading_text.text("Search completed!")
                log_resource_usage("After Search")

                # Load Images with Bounding Boxes if applicable
                if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
                    get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
                elif not st.session_state.search_in_small_objects and top_k_paths:
                    st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
                    get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
                else:
                    st.write("No results found.")

            except Exception as e:
                logging.error(f"Search failed: {e}")
                st.error(f"Search failed: {e}")

if __name__ == "__main__":
    main()