Spaces:

RockMi
/

onit-text-analysis

Running

App Files Files Community

Michela commited on Feb 19

Commit

c59454b

1 Parent(s): 099f9b1

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -2

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import gradio as gr
 import pandas as pd
 from difflib import SequenceMatcher
 # Import results
 results_clean = pd.read_csv("data/retrieval_results/sonnini_cleaned/i_onit-sonnini-DHd2025-clean-q_Pferd, Pferde.csv").head(100)
@@ -45,6 +46,8 @@ def preview_results(page, selected_data_source):
     row_elements = []
     for idx, (_, row) in enumerate(results.iterrows(), start=start_idx + 1):
         highlighted_text = row['unpacked_highlights']
         row_html = f"""
         <div style='border:1px solid #ddd; padding:10px; margin:5px 0; font-size: 18px;'>
             <b>{idx}. \'{row['document']}\'</b> - Score: {row['_score']} - Rank: {row['rank']}
@@ -85,7 +88,12 @@ def show_details(document_name, selected_data_source):
 # Gradio Interface
 with gr.Blocks() as demo:
-    gr.Markdown("## 🔍 Preview Text Retrieval Results with Marqo Vector Database")
     data_source_dropdown = gr.Dropdown(choices=list(data_sources.keys()), label="Select Data Source", value="Results Cleaned OCR")
     page_slider = gr.Slider(1, 1, step=1, label="Page", interactive=True)
@@ -129,7 +137,7 @@ with gr.Blocks() as demo:
     # Further information block at the end
     gr.Markdown("""
     ## 📚 Further Information
-    <div style="font-size: 18px;">
         <p>This demo lets you explore our preliminary results for retrieving <i>nature</i> representations in imperfect OCR data extracted from 17-19 century German texts.
         This research was done in the <a href="https://onit.oeaw.ac.at/">Ottoman Nature in Travelogues (ONiT)</a> project and funded by the Austrian Science Fund (FWF: P 35245).
         The text retrieval was done with hybrid vector/lexical search (BM25) by using a <a href="https://docs.marqo.ai/">Marqo</a>

 import gradio as gr
 import pandas as pd
 from difflib import SequenceMatcher
+import re
 # Import results
 results_clean = pd.read_csv("data/retrieval_results/sonnini_cleaned/i_onit-sonnini-DHd2025-clean-q_Pferd, Pferde.csv").head(100)
     row_elements = []
     for idx, (_, row) in enumerate(results.iterrows(), start=start_idx + 1):
         highlighted_text = row['unpacked_highlights']
+        # Highlight "Pferd" and "Pferde" using a span with a yellow background
+        highlighted_text = re.sub(r'\b(Pferd\w*)\b', r"<span style='background-color: yellow; font-weight: bold;'>\1</span>", highlighted_text, flags=re.IGNORECASE)
         row_html = f"""
         <div style='border:1px solid #ddd; padding:10px; margin:5px 0; font-size: 18px;'>
             <b>{idx}. \'{row['document']}\'</b> - Score: {row['_score']} - Rank: {row['rank']}
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("""
+                ## 🔍 Preview Text Retrieval Results with Marqo Vector Database
+                <div style="font-size: 16px;">
+                <p><b>Instructions:</b> Browse through the retrieval results for the text prompt <i>"Pferd, Pferde"</i> by sliding the page slider (up to 100 first retrieval results can be inspected).
+                To visualise details about the retrieved text chunk, copy and paste the document name (e.g. <i>Z166069305_430</i>) in the search bar below and click on the <i>Inspect</i> button. Please note that pressing <i>Enter</i> does not work.</p>
+                </div>""")
     data_source_dropdown = gr.Dropdown(choices=list(data_sources.keys()), label="Select Data Source", value="Results Cleaned OCR")
     page_slider = gr.Slider(1, 1, step=1, label="Page", interactive=True)
     # Further information block at the end
     gr.Markdown("""
     ## 📚 Further Information
+    <div style="font-size: 16px;">
         <p>This demo lets you explore our preliminary results for retrieving <i>nature</i> representations in imperfect OCR data extracted from 17-19 century German texts.
         This research was done in the <a href="https://onit.oeaw.ac.at/">Ottoman Nature in Travelogues (ONiT)</a> project and funded by the Austrian Science Fund (FWF: P 35245).
         The text retrieval was done with hybrid vector/lexical search (BM25) by using a <a href="https://docs.marqo.ai/">Marqo</a>