Michela commited on
Commit
ddbc294
·
1 Parent(s): ff8bdc8

Update app.py

Browse files

Added preview of annotations

Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -13,6 +13,7 @@ import re
13
  results_clean = pd.read_csv("data/retrieval_results/sonnini_cleaned/i_onit-sonnini-DHd2025-clean-q_Pferd, Pferde.csv").head(100)
14
  results_prep = pd.read_csv("data/retrieval_results/sonnini_llm_corrected/i_onit-sonnini-DHd2025-prep-q_Pferd, Pferde.csv").head(100)
15
  results_orig = pd.read_csv("data/retrieval_results/sonnini_original_OCR/i_onit-test-index-sonnini-q_Pferd-Pferde.csv").head(100)
 
16
 
17
  # Drop 'text_prep' from results_orig
18
  results_clean.drop(columns=['text_prep'], inplace=True)
@@ -23,7 +24,7 @@ results_orig['document'] = results_orig['document'].str[:-12]
23
  # Modify the "page" column to extract the numeric part and remove leading zeroes
24
  results_orig['page'] = results_orig['page'].str.extract(r'(\d+)', expand=False).astype(int)
25
 
26
- data_sources = {"Results Cleaned OCR": results_clean, "Results LLM Preprocessed OCR": results_prep, "Results Original OCR": results_orig}
27
 
28
  # Pagination settings
29
  R = 5 # Number of preview rows per page
@@ -163,7 +164,7 @@ with gr.Blocks() as demo:
163
  ## 🔍 Preview Text Retrieval Results with Marqo Vector Database
164
  <div style="font-size: 18px;">
165
  <p><b>Instructions:</b> Browse through the retrieval results for the text prompt <i>"Pferd, Pferde"</i> by sliding the page slider (up to 100 first retrieval results can be inspected).
166
- Select the data source: Choose between <i>Results Cleaned OCR, Results LLM Preprocessed OCR, and Results Original OCR</i>.
167
  To visualise details about the retrieved text chunk, copy and paste the document name (e.g. <i>Z166069305_430</i>) in the search bar below and click on the <i>Inspect</i> button.
168
  Please note that pressing <i>Enter</i> does not work.
169
  To inspect the page in the full book, click on <i>Open ONB Viewer</i> in the document details below.</p>
 
13
  results_clean = pd.read_csv("data/retrieval_results/sonnini_cleaned/i_onit-sonnini-DHd2025-clean-q_Pferd, Pferde.csv").head(100)
14
  results_prep = pd.read_csv("data/retrieval_results/sonnini_llm_corrected/i_onit-sonnini-DHd2025-prep-q_Pferd, Pferde.csv").head(100)
15
  results_orig = pd.read_csv("data/retrieval_results/sonnini_original_OCR/i_onit-test-index-sonnini-q_Pferd-Pferde.csv").head(100)
16
+ annotations = pd.read_csv("data/annotations/DHd2025_referenceReports_annotations_preview_horses.csv")
17
 
18
  # Drop 'text_prep' from results_orig
19
  results_clean.drop(columns=['text_prep'], inplace=True)
 
24
  # Modify the "page" column to extract the numeric part and remove leading zeroes
25
  results_orig['page'] = results_orig['page'].str.extract(r'(\d+)', expand=False).astype(int)
26
 
27
+ data_sources = {"Results Cleaned OCR": results_clean, "Results LLM Preprocessed OCR": results_prep, "Results Original OCR": results_orig, "Annotations": annotations}
28
 
29
  # Pagination settings
30
  R = 5 # Number of preview rows per page
 
164
  ## 🔍 Preview Text Retrieval Results with Marqo Vector Database
165
  <div style="font-size: 18px;">
166
  <p><b>Instructions:</b> Browse through the retrieval results for the text prompt <i>"Pferd, Pferde"</i> by sliding the page slider (up to 100 first retrieval results can be inspected).
167
+ Select the data source: Choose between <i>Results Cleaned OCR, Results LLM Preprocessed OCR, Results Original OCR,</i> and our <i>Annotations</i> of text passages mentioning <i>horses</i> in the text.
168
  To visualise details about the retrieved text chunk, copy and paste the document name (e.g. <i>Z166069305_430</i>) in the search bar below and click on the <i>Inspect</i> button.
169
  Please note that pressing <i>Enter</i> does not work.
170
  To inspect the page in the full book, click on <i>Open ONB Viewer</i> in the document details below.</p>