KB-VQA

Sleeping

App Files Files Community

m7mdal7aj commited on May 14, 2024

Commit

f6a1c31

verified ·

1 Parent(s): 977f4fb

Update my_model/tabs/dataset_analysis.py

Browse files

Files changed (1) hide show

my_model/tabs/dataset_analysis.py +24 -20

my_model/tabs/dataset_analysis.py CHANGED Viewed

@@ -246,33 +246,37 @@ class OKVQADatasetAnalyzer:
-def run_dataset_analyzer():
     datasets_comparison_table = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="VQA Datasets Comparison")
     okvqa_dataset_characteristics = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="OK-VQA Dataset Characteristics")
-    val_data = process_okvqa_dataset(config.DATASET_VAL_QUESTIONS_PATH, config.DATASET_VAL_ANNOTATIONS_PATH,
-                                     save_to_csv=False)
-    train_data = process_okvqa_dataset(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_TRAIN_ANNOTATIONS_PATH ,
-                                       save_to_csv=False)
-    dataset_analyzer = OKVQADatasetAnalyzer(config.DATASET_TRAIN_QUESTIONS_PATH,
-                                            config.DATASET_VAL_QUESTIONS_PATH, 'train_test')
     with st.container():
         st.markdown("## Overview of KB-VQA Datasets")
         col1, col2 = st.columns([2, 1])
         with col1:
             st.write(" ")
             with st.expander("1 - Knowledge-Based VQA (KB-VQA)"):
-                st.markdown(""" [Knowledge-Based VQA (KB-VQA)](https://arxiv.org/abs/1511.02570): One of the earliest
-                                datasets in this domain, KB-VQA comprises 700 images and 2,402 questions, with each
-                                question associated with both an image and a knowledge base (KB). The KB encapsulates
-                                facts about the world, including object names, properties, and relationships, aiming to
-                                 foster models capable of answering questions through reasoning over both the image
-                                 and the KB.\n""")
             with st.expander("2 - Factual VQA (FVQA)"):
                 st.markdown(""" [Factual VQA (FVQA)](https://arxiv.org/abs/1606.05433): This dataset includes 2,190
                                 images and 5,826 questions, accompanied by a knowledge base containing 193,449 facts.
@@ -296,6 +300,8 @@ def run_dataset_analyzer():
             st.markdown("#### KB-VQA Datasets Comparison")
             st.write(datasets_comparison_table, use_column_width=True)
     st.write("-----------------------")
     with st.container():
         st.write("\n" * 10)
         st.markdown("## OK-VQA Dataset")
@@ -307,16 +313,14 @@ def run_dataset_analyzer():
         with st.expander("Questions Distribution over Knowledge Category"):
             df = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="Question Category Dist")
             st.markdown("#### Questions Distribution over Knowledge Category")
-            dataset_analyzer.plot_bar_chart(df, "Knowledge Category", "Percentage", "Questions Distribution over "
-                                                                                      "Knowledge Category")
         with st.expander("Distribution of Question Keywords"):
-            #with st.expander("Distribution of Question Keywords"):
             dataset_analyzer.categorize_questions()
             st.markdown("#### Distribution of Question Keywords")
             dataset_analyzer.plot_question_distribution()
     with st.container():
         with st.expander("Show Dataset Samples"):
             st.write(train_data[:10])

+def run_dataset_analyzer() -> None:
+    """
+    Executes the dataset analysis process and displays the results using Streamlit.
+    This function provides an overview of the dataset, it utilizes the OKVQADatasetAnalyzer to visualize
+    the data.
+    """
+    # Load datasets from Excel
     datasets_comparison_table = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="VQA Datasets Comparison")
     okvqa_dataset_characteristics = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="OK-VQA Dataset Characteristics")
+    # Process OK-VQA datasets for validation and training
+    val_data = process_okvqa_dataset(config.DATASET_VAL_QUESTIONS_PATH, config.DATASET_VAL_ANNOTATIONS_PATH, save_to_csv=False)
+    train_data = process_okvqa_dataset(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_TRAIN_ANNOTATIONS_PATH, save_to_csv=False)
+    # Initialize the dataset analyzer
+    dataset_analyzer = OKVQADatasetAnalyzer(config.DATASET_TRAIN_QUESTIONS_PATH, config.DATASET_VAL_QUESTIONS_PATH, 'train_test')
+    # Display KB-VQA datasets overview
     with st.container():
         st.markdown("## Overview of KB-VQA Datasets")
         col1, col2 = st.columns([2, 1])
         with col1:
             st.write(" ")
             with st.expander("1 - Knowledge-Based VQA (KB-VQA)"):
+                st.markdown(""" [Knowledge-Based VQA (KB-VQA)](https://arxiv.org/abs/1511.02570): One of the earliest datasets in this domain, KB-VQA
+                                comprises 700 images and 2,402 questions, with each question associated with both an image
+                                and a knowledge base (KB). The KB encapsulates facts about the world, including object
+                                names, properties, and relationships, aiming to foster models capable of answering
+                                questions through reasoning over both the image and the KB.\n""")
             with st.expander("2 - Factual VQA (FVQA)"):
                 st.markdown(""" [Factual VQA (FVQA)](https://arxiv.org/abs/1606.05433): This dataset includes 2,190
                                 images and 5,826 questions, accompanied by a knowledge base containing 193,449 facts.
             st.markdown("#### KB-VQA Datasets Comparison")
             st.write(datasets_comparison_table, use_column_width=True)
     st.write("-----------------------")
+    # Display OK-VQA dataset details
     with st.container():
         st.write("\n" * 10)
         st.markdown("## OK-VQA Dataset")
         with st.expander("Questions Distribution over Knowledge Category"):
             df = pd.read_excel(config.DATASET_ANALYSES_PATH, sheet_name="Question Category Dist")
             st.markdown("#### Questions Distribution over Knowledge Category")
+            dataset_analyzer.plot_bar_chart(df, "Knowledge Category", "Percentage", "Questions Distribution over Knowledge Category")
         with st.expander("Distribution of Question Keywords"):
             dataset_analyzer.categorize_questions()
             st.markdown("#### Distribution of Question Keywords")
             dataset_analyzer.plot_question_distribution()
+    # Display sample data
     with st.container():
         with st.expander("Show Dataset Samples"):
             st.write(train_data[:10])