Commit
Β·
b6b196e
1
Parent(s):
f7fafc2
Introduce preanniversary-phrases and dictionary selection
Browse files- README.md +11 -1
- app.py +18 -1
- report.py +12 -9
- requirements.txt +2 -2
- search.py +35 -7
README.md
CHANGED
@@ -1,16 +1,26 @@
|
|
1 |
---
|
2 |
title: Grascii Search
|
|
|
3 |
emoji: π
|
4 |
colorFrom: gray
|
5 |
colorTo: green
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
models:
|
11 |
- grascii/gregg-vision-v0.2.1
|
12 |
datasets:
|
13 |
- grascii/gregg-preanniversary-words
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
---
|
15 |
|
16 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: Grascii Search
|
3 |
+
short_description: Search Grascii's Gregg Shorthand Dictionaries
|
4 |
emoji: π
|
5 |
colorFrom: gray
|
6 |
colorTo: green
|
7 |
sdk: streamlit
|
8 |
+
sdk_version: 1.46.1
|
9 |
app_file: app.py
|
10 |
pinned: true
|
11 |
models:
|
12 |
- grascii/gregg-vision-v0.2.1
|
13 |
datasets:
|
14 |
- grascii/gregg-preanniversary-words
|
15 |
+
- grascii/gregg-preanniversary-phrases
|
16 |
+
preload_from_hub:
|
17 |
+
- grascii/gregg-vision-v0.2.1
|
18 |
+
- grascii/gregg-preanniversary-words
|
19 |
+
- grascii/gregg-preanniversary-phrases
|
20 |
+
tags:
|
21 |
+
- gregg
|
22 |
+
- shorthand
|
23 |
+
- stenography
|
24 |
---
|
25 |
|
26 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -13,7 +13,7 @@ st.set_page_config(
|
|
13 |
)
|
14 |
|
15 |
import pandas as pd # noqa E402
|
16 |
-
from search import write_grascii_search, write_reverse_search # noqa E402
|
17 |
|
18 |
pd.options.mode.copy_on_write = True
|
19 |
|
@@ -33,6 +33,23 @@ if st.session_state["report_submitted"]:
|
|
33 |
st.toast("Thanks for the report!")
|
34 |
st.session_state["report_submitted"] = False
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
tab1, tab2 = st.tabs(["Grascii", "Reverse"])
|
37 |
|
38 |
with tab1:
|
|
|
13 |
)
|
14 |
|
15 |
import pandas as pd # noqa E402
|
16 |
+
from search import write_grascii_search, write_reverse_search, write_dictionaries # noqa E402
|
17 |
|
18 |
pd.options.mode.copy_on_write = True
|
19 |
|
|
|
33 |
st.toast("Thanks for the report!")
|
34 |
st.session_state["report_submitted"] = False
|
35 |
|
36 |
+
with st.sidebar:
|
37 |
+
st.markdown(
|
38 |
+
"""
|
39 |
+
# What's New
|
40 |
+
|
41 |
+
:blue-badge[2025-07-15]
|
42 |
+
|
43 |
+
- The preanniversary-phrases dictionary is now available!
|
44 |
+
- You may select which dictionaries you would like to search.
|
45 |
+
- To see which dictionary contained each result, hover over the result
|
46 |
+
table. Click the "eye" icon at the top right of the table and
|
47 |
+
select "Dictionary".
|
48 |
+
"""
|
49 |
+
)
|
50 |
+
|
51 |
+
write_dictionaries()
|
52 |
+
|
53 |
tab1, tab2 = st.tabs(["Grascii", "Reverse"])
|
54 |
|
55 |
with tab1:
|
report.py
CHANGED
@@ -21,6 +21,7 @@ def write_header(writer):
|
|
21 |
"date",
|
22 |
"grascii",
|
23 |
"longhand",
|
|
|
24 |
"incorrect_grascii",
|
25 |
"incorrect_longhand",
|
26 |
"incorrect_shorthand",
|
@@ -47,11 +48,11 @@ def report_dialog(data):
|
|
47 |
st.write("Please select one or more reasons for flagging each row:")
|
48 |
|
49 |
report_df = data
|
50 |
-
report_df["3"] = False
|
51 |
report_df["4"] = False
|
52 |
report_df["5"] = False
|
53 |
report_df["6"] = False
|
54 |
report_df["7"] = False
|
|
|
55 |
final_report = st.data_editor(
|
56 |
report_df,
|
57 |
hide_index=True,
|
@@ -59,17 +60,18 @@ def report_dialog(data):
|
|
59 |
"0": "Grascii",
|
60 |
"1": "Longhand",
|
61 |
"2": st.column_config.ImageColumn("Shorthand", width="medium"),
|
62 |
-
"3":
|
63 |
-
"4": st.column_config.CheckboxColumn("
|
64 |
-
"5": st.column_config.CheckboxColumn("
|
65 |
-
"6": st.column_config.CheckboxColumn(
|
|
|
66 |
"Shorthand image is improperly cropped"
|
67 |
),
|
68 |
-
"
|
69 |
"Shorthand image contains extraneous marks"
|
70 |
),
|
71 |
},
|
72 |
-
disabled=["0", "1", "2"],
|
73 |
use_container_width=True,
|
74 |
)
|
75 |
|
@@ -83,11 +85,11 @@ def report_dialog(data):
|
|
83 |
write_header(writer)
|
84 |
if any(
|
85 |
[
|
86 |
-
row.iloc[3],
|
87 |
row.iloc[4],
|
88 |
row.iloc[5],
|
89 |
row.iloc[6],
|
90 |
row.iloc[7],
|
|
|
91 |
]
|
92 |
):
|
93 |
writer.writerow(
|
@@ -95,11 +97,12 @@ def report_dialog(data):
|
|
95 |
datetime.now(timezone.utc).date(),
|
96 |
row.iloc[0],
|
97 |
row.iloc[1],
|
98 |
-
|
99 |
1 if row.iloc[4] else 0,
|
100 |
1 if row.iloc[5] else 0,
|
101 |
1 if row.iloc[6] else 0,
|
102 |
1 if row.iloc[7] else 0,
|
|
|
103 |
]
|
104 |
)
|
105 |
|
|
|
21 |
"date",
|
22 |
"grascii",
|
23 |
"longhand",
|
24 |
+
"dictionary",
|
25 |
"incorrect_grascii",
|
26 |
"incorrect_longhand",
|
27 |
"incorrect_shorthand",
|
|
|
48 |
st.write("Please select one or more reasons for flagging each row:")
|
49 |
|
50 |
report_df = data
|
|
|
51 |
report_df["4"] = False
|
52 |
report_df["5"] = False
|
53 |
report_df["6"] = False
|
54 |
report_df["7"] = False
|
55 |
+
report_df["8"] = False
|
56 |
final_report = st.data_editor(
|
57 |
report_df,
|
58 |
hide_index=True,
|
|
|
60 |
"0": "Grascii",
|
61 |
"1": "Longhand",
|
62 |
"2": st.column_config.ImageColumn("Shorthand", width="medium"),
|
63 |
+
"3": "Dictionary",
|
64 |
+
"4": st.column_config.CheckboxColumn("Grascii is incorrect"),
|
65 |
+
"5": st.column_config.CheckboxColumn("Longhand is incorrect"),
|
66 |
+
"6": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
|
67 |
+
"7": st.column_config.CheckboxColumn(
|
68 |
"Shorthand image is improperly cropped"
|
69 |
),
|
70 |
+
"8": st.column_config.CheckboxColumn(
|
71 |
"Shorthand image contains extraneous marks"
|
72 |
),
|
73 |
},
|
74 |
+
disabled=["0", "1", "2", "3"],
|
75 |
use_container_width=True,
|
76 |
)
|
77 |
|
|
|
85 |
write_header(writer)
|
86 |
if any(
|
87 |
[
|
|
|
88 |
row.iloc[4],
|
89 |
row.iloc[5],
|
90 |
row.iloc[6],
|
91 |
row.iloc[7],
|
92 |
+
row.iloc[8],
|
93 |
]
|
94 |
):
|
95 |
writer.writerow(
|
|
|
97 |
datetime.now(timezone.utc).date(),
|
98 |
row.iloc[0],
|
99 |
row.iloc[1],
|
100 |
+
row.iloc[3],
|
101 |
1 if row.iloc[4] else 0,
|
102 |
1 if row.iloc[5] else 0,
|
103 |
1 if row.iloc[6] else 0,
|
104 |
1 if row.iloc[7] else 0,
|
105 |
+
1 if row.iloc[8] else 0,
|
106 |
]
|
107 |
)
|
108 |
|
requirements.txt
CHANGED
@@ -17,7 +17,7 @@ frozenlist==1.5.0
|
|
17 |
fsspec==2024.9.0
|
18 |
gitdb==4.0.11
|
19 |
GitPython==3.1.43
|
20 |
-
grascii==0.
|
21 |
huggingface-hub==0.26.5
|
22 |
idna==3.10
|
23 |
Jinja2==3.1.4
|
@@ -66,7 +66,7 @@ rpds-py==0.22.3
|
|
66 |
safetensors==0.4.5
|
67 |
six==1.17.0
|
68 |
smmap==5.0.1
|
69 |
-
streamlit==1.
|
70 |
sympy==1.13.1
|
71 |
tenacity==9.0.0
|
72 |
tokenizers==0.21.0
|
|
|
17 |
fsspec==2024.9.0
|
18 |
gitdb==4.0.11
|
19 |
GitPython==3.1.43
|
20 |
+
grascii==0.7.0
|
21 |
huggingface-hub==0.26.5
|
22 |
idna==3.10
|
23 |
Jinja2==3.1.4
|
|
|
66 |
safetensors==0.4.5
|
67 |
six==1.17.0
|
68 |
smmap==5.0.1
|
69 |
+
streamlit==1.46.1
|
70 |
sympy==1.13.1
|
71 |
tenacity==9.0.0
|
72 |
tokenizers==0.21.0
|
search.py
CHANGED
@@ -16,9 +16,14 @@ MAX_GRASCII_LENGTH = 16
|
|
16 |
|
17 |
@st.cache_data(show_spinner="Loading shorthand images")
|
18 |
def load_images():
|
19 |
-
|
20 |
-
"grascii/gregg-preanniversary-words",
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
22 |
image_map = {}
|
23 |
for row in ds:
|
24 |
buffered = BytesIO()
|
@@ -31,6 +36,21 @@ def load_images():
|
|
31 |
image_map = load_images()
|
32 |
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
def on_submit():
|
35 |
if "grascii_text_box" in st.session_state:
|
36 |
st.session_state["grascii"] = st.session_state["grascii_text_box"]
|
@@ -38,12 +58,12 @@ def on_submit():
|
|
38 |
|
39 |
|
40 |
def write_grascii_search():
|
41 |
-
searcher = GrasciiSearcher()
|
42 |
grascii_results = []
|
43 |
|
44 |
search_by = st.radio("Search by", ["text", "image (beta)"], horizontal=True)
|
45 |
|
46 |
with st.form("Grascii Search"):
|
|
|
47 |
placeholder = st.empty()
|
48 |
if search_by == "text":
|
49 |
placeholder.text_input(
|
@@ -98,7 +118,7 @@ def write_grascii_search():
|
|
98 |
help="""
|
99 |
How to intepret ambiguous Grascii strings.
|
100 |
|
101 |
-
- best: Only search using the
|
102 |
- all: Search using all possible interpretations.
|
103 |
""",
|
104 |
)
|
@@ -181,6 +201,8 @@ def write_grascii_search():
|
|
181 |
st.error(f"Grascii too long. Max: {MAX_GRASCII_LENGTH} characters")
|
182 |
return
|
183 |
|
|
|
|
|
184 |
try:
|
185 |
grascii_results = searcher.sorted_search(
|
186 |
grascii=grascii,
|
@@ -220,7 +242,8 @@ def write_results(results, term, key_prefix):
|
|
220 |
lambda r: [
|
221 |
r.entry.grascii,
|
222 |
r.entry.translation,
|
223 |
-
image_map.get(r.entry.translation),
|
|
|
224 |
],
|
225 |
results,
|
226 |
)
|
@@ -229,13 +252,18 @@ def write_results(results, term, key_prefix):
|
|
229 |
r = "Results" if len(data) != 1 else "Result"
|
230 |
st.write(f'{len(data)} {r} for "{term}"')
|
231 |
|
|
|
|
|
|
|
232 |
event = st.dataframe(
|
233 |
data,
|
234 |
use_container_width=True,
|
|
|
235 |
column_config={
|
236 |
"0": "Grascii",
|
237 |
"1": "Longhand",
|
238 |
"2": st.column_config.ImageColumn("Shorthand", width="medium"),
|
|
|
239 |
},
|
240 |
selection_mode="multi-row",
|
241 |
on_select="rerun",
|
@@ -253,7 +281,6 @@ def write_results(results, term, key_prefix):
|
|
253 |
|
254 |
|
255 |
def write_reverse_search():
|
256 |
-
searcher = ReverseSearcher()
|
257 |
reverse_results = []
|
258 |
|
259 |
with st.form("Reverse Search"):
|
@@ -262,6 +289,7 @@ def write_reverse_search():
|
|
262 |
st.form_submit_button("Search")
|
263 |
|
264 |
if word:
|
|
|
265 |
reverse_results = searcher.sorted_search(
|
266 |
reverse=word,
|
267 |
)
|
|
|
16 |
|
17 |
@st.cache_data(show_spinner="Loading shorthand images")
|
18 |
def load_images():
|
19 |
+
return {
|
20 |
+
":preanniversary": load_dataset_images("grascii/gregg-preanniversary-words"),
|
21 |
+
":preanniversary-phrases": load_dataset_images("grascii/gregg-preanniversary-phrases"),
|
22 |
+
}
|
23 |
+
|
24 |
+
|
25 |
+
def load_dataset_images(dataset):
|
26 |
+
ds = load_dataset(dataset, split="train", token=st.secrets.HF_TOKEN)
|
27 |
image_map = {}
|
28 |
for row in ds:
|
29 |
buffered = BytesIO()
|
|
|
36 |
image_map = load_images()
|
37 |
|
38 |
|
39 |
+
AVAILABLE_DICTIONARIES = [":preanniversary", ":preanniversary-phrases"]
|
40 |
+
|
41 |
+
|
42 |
+
@st.fragment
|
43 |
+
def write_dictionaries():
|
44 |
+
return st.pills(
|
45 |
+
"Dictionaries",
|
46 |
+
AVAILABLE_DICTIONARIES,
|
47 |
+
default=st.session_state.get("dictionaries", AVAILABLE_DICTIONARIES),
|
48 |
+
selection_mode="multi",
|
49 |
+
key="dictionaries",
|
50 |
+
format_func=lambda d: d[1:]
|
51 |
+
)
|
52 |
+
|
53 |
+
|
54 |
def on_submit():
|
55 |
if "grascii_text_box" in st.session_state:
|
56 |
st.session_state["grascii"] = st.session_state["grascii_text_box"]
|
|
|
58 |
|
59 |
|
60 |
def write_grascii_search():
|
|
|
61 |
grascii_results = []
|
62 |
|
63 |
search_by = st.radio("Search by", ["text", "image (beta)"], horizontal=True)
|
64 |
|
65 |
with st.form("Grascii Search"):
|
66 |
+
|
67 |
placeholder = st.empty()
|
68 |
if search_by == "text":
|
69 |
placeholder.text_input(
|
|
|
118 |
help="""
|
119 |
How to intepret ambiguous Grascii strings.
|
120 |
|
121 |
+
- best: Only search using the [canonical interpretation](https://grascii.readthedocs.io/en/stable/interpretation.html#the-canonical-interpretation).
|
122 |
- all: Search using all possible interpretations.
|
123 |
""",
|
124 |
)
|
|
|
201 |
st.error(f"Grascii too long. Max: {MAX_GRASCII_LENGTH} characters")
|
202 |
return
|
203 |
|
204 |
+
searcher = GrasciiSearcher(dictionaries=st.session_state["dictionaries"])
|
205 |
+
|
206 |
try:
|
207 |
grascii_results = searcher.sorted_search(
|
208 |
grascii=grascii,
|
|
|
242 |
lambda r: [
|
243 |
r.entry.grascii,
|
244 |
r.entry.translation,
|
245 |
+
image_map.get(r.dictionary.name, {}).get(r.entry.translation),
|
246 |
+
r.dictionary.name[1:],
|
247 |
],
|
248 |
results,
|
249 |
)
|
|
|
252 |
r = "Results" if len(data) != 1 else "Result"
|
253 |
st.write(f'{len(data)} {r} for "{term}"')
|
254 |
|
255 |
+
if data.empty:
|
256 |
+
return
|
257 |
+
|
258 |
event = st.dataframe(
|
259 |
data,
|
260 |
use_container_width=True,
|
261 |
+
column_order=("0", "1", "2"),
|
262 |
column_config={
|
263 |
"0": "Grascii",
|
264 |
"1": "Longhand",
|
265 |
"2": st.column_config.ImageColumn("Shorthand", width="medium"),
|
266 |
+
"3": "Dictionary",
|
267 |
},
|
268 |
selection_mode="multi-row",
|
269 |
on_select="rerun",
|
|
|
281 |
|
282 |
|
283 |
def write_reverse_search():
|
|
|
284 |
reverse_results = []
|
285 |
|
286 |
with st.form("Reverse Search"):
|
|
|
289 |
st.form_submit_button("Search")
|
290 |
|
291 |
if word:
|
292 |
+
searcher = ReverseSearcher(dictionaries=st.session_state["dictionaries"])
|
293 |
reverse_results = searcher.sorted_search(
|
294 |
reverse=word,
|
295 |
)
|