Spaces:
Sleeping
Sleeping
Michela
commited on
Commit
·
ddbc294
1
Parent(s):
ff8bdc8
Update app.py
Browse filesAdded preview of annotations
app.py
CHANGED
@@ -13,6 +13,7 @@ import re
|
|
13 |
results_clean = pd.read_csv("data/retrieval_results/sonnini_cleaned/i_onit-sonnini-DHd2025-clean-q_Pferd, Pferde.csv").head(100)
|
14 |
results_prep = pd.read_csv("data/retrieval_results/sonnini_llm_corrected/i_onit-sonnini-DHd2025-prep-q_Pferd, Pferde.csv").head(100)
|
15 |
results_orig = pd.read_csv("data/retrieval_results/sonnini_original_OCR/i_onit-test-index-sonnini-q_Pferd-Pferde.csv").head(100)
|
|
|
16 |
|
17 |
# Drop 'text_prep' from results_orig
|
18 |
results_clean.drop(columns=['text_prep'], inplace=True)
|
@@ -23,7 +24,7 @@ results_orig['document'] = results_orig['document'].str[:-12]
|
|
23 |
# Modify the "page" column to extract the numeric part and remove leading zeroes
|
24 |
results_orig['page'] = results_orig['page'].str.extract(r'(\d+)', expand=False).astype(int)
|
25 |
|
26 |
-
data_sources = {"Results Cleaned OCR": results_clean, "Results LLM Preprocessed OCR": results_prep, "Results Original OCR": results_orig}
|
27 |
|
28 |
# Pagination settings
|
29 |
R = 5 # Number of preview rows per page
|
@@ -163,7 +164,7 @@ with gr.Blocks() as demo:
|
|
163 |
## 🔍 Preview Text Retrieval Results with Marqo Vector Database
|
164 |
<div style="font-size: 18px;">
|
165 |
<p><b>Instructions:</b> Browse through the retrieval results for the text prompt <i>"Pferd, Pferde"</i> by sliding the page slider (up to 100 first retrieval results can be inspected).
|
166 |
-
Select the data source: Choose between <i>Results Cleaned OCR, Results LLM Preprocessed OCR,
|
167 |
To visualise details about the retrieved text chunk, copy and paste the document name (e.g. <i>Z166069305_430</i>) in the search bar below and click on the <i>Inspect</i> button.
|
168 |
Please note that pressing <i>Enter</i> does not work.
|
169 |
To inspect the page in the full book, click on <i>Open ONB Viewer</i> in the document details below.</p>
|
|
|
13 |
results_clean = pd.read_csv("data/retrieval_results/sonnini_cleaned/i_onit-sonnini-DHd2025-clean-q_Pferd, Pferde.csv").head(100)
|
14 |
results_prep = pd.read_csv("data/retrieval_results/sonnini_llm_corrected/i_onit-sonnini-DHd2025-prep-q_Pferd, Pferde.csv").head(100)
|
15 |
results_orig = pd.read_csv("data/retrieval_results/sonnini_original_OCR/i_onit-test-index-sonnini-q_Pferd-Pferde.csv").head(100)
|
16 |
+
annotations = pd.read_csv("data/annotations/DHd2025_referenceReports_annotations_preview_horses.csv")
|
17 |
|
18 |
# Drop 'text_prep' from results_orig
|
19 |
results_clean.drop(columns=['text_prep'], inplace=True)
|
|
|
24 |
# Modify the "page" column to extract the numeric part and remove leading zeroes
|
25 |
results_orig['page'] = results_orig['page'].str.extract(r'(\d+)', expand=False).astype(int)
|
26 |
|
27 |
+
data_sources = {"Results Cleaned OCR": results_clean, "Results LLM Preprocessed OCR": results_prep, "Results Original OCR": results_orig, "Annotations": annotations}
|
28 |
|
29 |
# Pagination settings
|
30 |
R = 5 # Number of preview rows per page
|
|
|
164 |
## 🔍 Preview Text Retrieval Results with Marqo Vector Database
|
165 |
<div style="font-size: 18px;">
|
166 |
<p><b>Instructions:</b> Browse through the retrieval results for the text prompt <i>"Pferd, Pferde"</i> by sliding the page slider (up to 100 first retrieval results can be inspected).
|
167 |
+
Select the data source: Choose between <i>Results Cleaned OCR, Results LLM Preprocessed OCR, Results Original OCR,</i> and our <i>Annotations</i> of text passages mentioning <i>horses</i> in the text.
|
168 |
To visualise details about the retrieved text chunk, copy and paste the document name (e.g. <i>Z166069305_430</i>) in the search bar below and click on the <i>Inspect</i> button.
|
169 |
Please note that pressing <i>Enter</i> does not work.
|
170 |
To inspect the page in the full book, click on <i>Open ONB Viewer</i> in the document details below.</p>
|