Spaces:

grascii
/

search

Running

App Files Files Community

chanicpanic commited on Jul 16

Commit

b6b196e

1 Parent(s): f7fafc2

Introduce preanniversary-phrases and dictionary selection

Browse files

Files changed (5) hide show

README.md +11 -1
app.py +18 -1
report.py +12 -9
requirements.txt +2 -2
search.py +35 -7

README.md CHANGED Viewed

@@ -1,16 +1,26 @@
 ---
 title: Grascii Search
 emoji: 🔎
 colorFrom: gray
 colorTo: green
 sdk: streamlit
-sdk_version: 1.40.2
 app_file: app.py
 pinned: true
 models:
   - grascii/gregg-vision-v0.2.1
 datasets:
   - grascii/gregg-preanniversary-words
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Grascii Search
+short_description: Search Grascii's Gregg Shorthand Dictionaries
 emoji: 🔎
 colorFrom: gray
 colorTo: green
 sdk: streamlit
+sdk_version: 1.46.1
 app_file: app.py
 pinned: true
 models:
   - grascii/gregg-vision-v0.2.1
 datasets:
   - grascii/gregg-preanniversary-words
+  - grascii/gregg-preanniversary-phrases
+preload_from_hub:
+  - grascii/gregg-vision-v0.2.1
+  - grascii/gregg-preanniversary-words
+  - grascii/gregg-preanniversary-phrases
+tags:
+  - gregg
+  - shorthand
+  - stenography
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ st.set_page_config(
 )
 import pandas as pd  # noqa E402
-from search import write_grascii_search, write_reverse_search  # noqa E402
 pd.options.mode.copy_on_write = True
@@ -33,6 +33,23 @@ if st.session_state["report_submitted"]:
     st.toast("Thanks for the report!")
     st.session_state["report_submitted"] = False
 tab1, tab2 = st.tabs(["Grascii", "Reverse"])
 with tab1:

 )
 import pandas as pd  # noqa E402
+from search import write_grascii_search, write_reverse_search, write_dictionaries  # noqa E402
 pd.options.mode.copy_on_write = True
     st.toast("Thanks for the report!")
     st.session_state["report_submitted"] = False
+with st.sidebar:
+    st.markdown(
+        """
+        # What's New
+        :blue-badge[2025-07-15]
+        - The preanniversary-phrases dictionary is now available!
+        - You may select which dictionaries you would like to search.
+        - To see which dictionary contained each result, hover over the result
+          table. Click the "eye" icon at the top right of the table and
+          select "Dictionary".
+        """
+    )
+write_dictionaries()
 tab1, tab2 = st.tabs(["Grascii", "Reverse"])
 with tab1:

report.py CHANGED Viewed

@@ -21,6 +21,7 @@ def write_header(writer):
             "date",
             "grascii",
             "longhand",
             "incorrect_grascii",
             "incorrect_longhand",
             "incorrect_shorthand",
@@ -47,11 +48,11 @@ def report_dialog(data):
     st.write("Please select one or more reasons for flagging each row:")
     report_df = data
-    report_df["3"] = False
     report_df["4"] = False
     report_df["5"] = False
     report_df["6"] = False
     report_df["7"] = False
     final_report = st.data_editor(
         report_df,
         hide_index=True,
@@ -59,17 +60,18 @@ def report_dialog(data):
             "0": "Grascii",
             "1": "Longhand",
             "2": st.column_config.ImageColumn("Shorthand", width="medium"),
-            "3": st.column_config.CheckboxColumn("Grascii is incorrect"),
-            "4": st.column_config.CheckboxColumn("Longhand is incorrect"),
-            "5": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
-            "6": st.column_config.CheckboxColumn(
                 "Shorthand image is improperly cropped"
             ),
-            "7": st.column_config.CheckboxColumn(
                 "Shorthand image contains extraneous marks"
             ),
         },
-        disabled=["0", "1", "2"],
         use_container_width=True,
     )
@@ -83,11 +85,11 @@ def report_dialog(data):
                         write_header(writer)
                     if any(
                         [
-                            row.iloc[3],
                             row.iloc[4],
                             row.iloc[5],
                             row.iloc[6],
                             row.iloc[7],
                         ]
                     ):
                         writer.writerow(
@@ -95,11 +97,12 @@ def report_dialog(data):
                                 datetime.now(timezone.utc).date(),
                                 row.iloc[0],
                                 row.iloc[1],
-                                1 if row.iloc[3] else 0,
                                 1 if row.iloc[4] else 0,
                                 1 if row.iloc[5] else 0,
                                 1 if row.iloc[6] else 0,
                                 1 if row.iloc[7] else 0,
                             ]
                         )

             "date",
             "grascii",
             "longhand",
+            "dictionary",
             "incorrect_grascii",
             "incorrect_longhand",
             "incorrect_shorthand",
     st.write("Please select one or more reasons for flagging each row:")
     report_df = data
     report_df["4"] = False
     report_df["5"] = False
     report_df["6"] = False
     report_df["7"] = False
+    report_df["8"] = False
     final_report = st.data_editor(
         report_df,
         hide_index=True,
             "0": "Grascii",
             "1": "Longhand",
             "2": st.column_config.ImageColumn("Shorthand", width="medium"),
+            "3": "Dictionary",
+            "4": st.column_config.CheckboxColumn("Grascii is incorrect"),
+            "5": st.column_config.CheckboxColumn("Longhand is incorrect"),
+            "6": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
+            "7": st.column_config.CheckboxColumn(
                 "Shorthand image is improperly cropped"
             ),
+            "8": st.column_config.CheckboxColumn(
                 "Shorthand image contains extraneous marks"
             ),
         },
+        disabled=["0", "1", "2", "3"],
         use_container_width=True,
     )
                         write_header(writer)
                     if any(
                         [
                             row.iloc[4],
                             row.iloc[5],
                             row.iloc[6],
                             row.iloc[7],
+                            row.iloc[8],
                         ]
                     ):
                         writer.writerow(
                                 datetime.now(timezone.utc).date(),
                                 row.iloc[0],
                                 row.iloc[1],
+                                row.iloc[3],
                                 1 if row.iloc[4] else 0,
                                 1 if row.iloc[5] else 0,
                                 1 if row.iloc[6] else 0,
                                 1 if row.iloc[7] else 0,
+                                1 if row.iloc[8] else 0,
                             ]
                         )

requirements.txt CHANGED Viewed

@@ -17,7 +17,7 @@ frozenlist==1.5.0
 fsspec==2024.9.0
 gitdb==4.0.11
 GitPython==3.1.43
-grascii==0.6.1
 huggingface-hub==0.26.5
 idna==3.10
 Jinja2==3.1.4
@@ -66,7 +66,7 @@ rpds-py==0.22.3
 safetensors==0.4.5
 six==1.17.0
 smmap==5.0.1
-streamlit==1.40.2
 sympy==1.13.1
 tenacity==9.0.0
 tokenizers==0.21.0

 fsspec==2024.9.0
 gitdb==4.0.11
 GitPython==3.1.43
+grascii==0.7.0
 huggingface-hub==0.26.5
 idna==3.10
 Jinja2==3.1.4
 safetensors==0.4.5
 six==1.17.0
 smmap==5.0.1
+streamlit==1.46.1
 sympy==1.13.1
 tenacity==9.0.0
 tokenizers==0.21.0

search.py CHANGED Viewed

@@ -16,9 +16,14 @@ MAX_GRASCII_LENGTH = 16
 @st.cache_data(show_spinner="Loading shorthand images")
 def load_images():
-    ds = load_dataset(
-        "grascii/gregg-preanniversary-words", split="train", token=st.secrets.HF_TOKEN
-    )
     image_map = {}
     for row in ds:
         buffered = BytesIO()
@@ -31,6 +36,21 @@ def load_images():
 image_map = load_images()
 def on_submit():
     if "grascii_text_box" in st.session_state:
         st.session_state["grascii"] = st.session_state["grascii_text_box"]
@@ -38,12 +58,12 @@ def on_submit():
 def write_grascii_search():
-    searcher = GrasciiSearcher()
     grascii_results = []
     search_by = st.radio("Search by", ["text", "image (beta)"], horizontal=True)
     with st.form("Grascii Search"):
         placeholder = st.empty()
         if search_by == "text":
             placeholder.text_input(
@@ -98,7 +118,7 @@ def write_grascii_search():
                 help="""
                     How to intepret ambiguous Grascii strings.
-                    - best: Only search using the best interpretation.
                     - all: Search using all possible interpretations.
                     """,
             )
@@ -181,6 +201,8 @@ def write_grascii_search():
         st.error(f"Grascii too long. Max: {MAX_GRASCII_LENGTH} characters")
         return
     try:
         grascii_results = searcher.sorted_search(
             grascii=grascii,
@@ -220,7 +242,8 @@ def write_results(results, term, key_prefix):
         lambda r: [
             r.entry.grascii,
             r.entry.translation,
-            image_map.get(r.entry.translation),
         ],
         results,
     )
@@ -229,13 +252,18 @@ def write_results(results, term, key_prefix):
     r = "Results" if len(data) != 1 else "Result"
     st.write(f'{len(data)} {r} for "{term}"')
     event = st.dataframe(
         data,
         use_container_width=True,
         column_config={
             "0": "Grascii",
             "1": "Longhand",
             "2": st.column_config.ImageColumn("Shorthand", width="medium"),
         },
         selection_mode="multi-row",
         on_select="rerun",
@@ -253,7 +281,6 @@ def write_results(results, term, key_prefix):
 def write_reverse_search():
-    searcher = ReverseSearcher()
     reverse_results = []
     with st.form("Reverse Search"):
@@ -262,6 +289,7 @@ def write_reverse_search():
         st.form_submit_button("Search")
         if word:
             reverse_results = searcher.sorted_search(
                 reverse=word,
             )

 @st.cache_data(show_spinner="Loading shorthand images")
 def load_images():
+    return {
+        ":preanniversary": load_dataset_images("grascii/gregg-preanniversary-words"),
+        ":preanniversary-phrases": load_dataset_images("grascii/gregg-preanniversary-phrases"),
+    }
+def load_dataset_images(dataset):
+    ds = load_dataset(dataset, split="train", token=st.secrets.HF_TOKEN)
     image_map = {}
     for row in ds:
         buffered = BytesIO()
 image_map = load_images()
+AVAILABLE_DICTIONARIES = [":preanniversary", ":preanniversary-phrases"]
+@st.fragment
+def write_dictionaries():
+    return st.pills(
+        "Dictionaries",
+        AVAILABLE_DICTIONARIES,
+        default=st.session_state.get("dictionaries", AVAILABLE_DICTIONARIES),
+        selection_mode="multi",
+        key="dictionaries",
+        format_func=lambda d: d[1:]
+    )
 def on_submit():
     if "grascii_text_box" in st.session_state:
         st.session_state["grascii"] = st.session_state["grascii_text_box"]
 def write_grascii_search():
     grascii_results = []
     search_by = st.radio("Search by", ["text", "image (beta)"], horizontal=True)
     with st.form("Grascii Search"):
         placeholder = st.empty()
         if search_by == "text":
             placeholder.text_input(
                 help="""
                     How to intepret ambiguous Grascii strings.
+                    - best: Only search using the [canonical interpretation](https://grascii.readthedocs.io/en/stable/interpretation.html#the-canonical-interpretation).
                     - all: Search using all possible interpretations.
                     """,
             )
         st.error(f"Grascii too long. Max: {MAX_GRASCII_LENGTH} characters")
         return
+    searcher = GrasciiSearcher(dictionaries=st.session_state["dictionaries"])
     try:
         grascii_results = searcher.sorted_search(
             grascii=grascii,
         lambda r: [
             r.entry.grascii,
             r.entry.translation,
+            image_map.get(r.dictionary.name, {}).get(r.entry.translation),
+            r.dictionary.name[1:],
         ],
         results,
     )
     r = "Results" if len(data) != 1 else "Result"
     st.write(f'{len(data)} {r} for "{term}"')
+    if data.empty:
+        return
     event = st.dataframe(
         data,
         use_container_width=True,
+        column_order=("0", "1", "2"),
         column_config={
             "0": "Grascii",
             "1": "Longhand",
             "2": st.column_config.ImageColumn("Shorthand", width="medium"),
+            "3": "Dictionary",
         },
         selection_mode="multi-row",
         on_select="rerun",
 def write_reverse_search():
     reverse_results = []
     with st.form("Reverse Search"):
         st.form_submit_button("Search")
         if word:
+            searcher = ReverseSearcher(dictionaries=st.session_state["dictionaries"])
             reverse_results = searcher.sorted_search(
                 reverse=word,
             )