chanicpanic commited on
Commit
b6b196e
Β·
1 Parent(s): f7fafc2

Introduce preanniversary-phrases and dictionary selection

Browse files
Files changed (5) hide show
  1. README.md +11 -1
  2. app.py +18 -1
  3. report.py +12 -9
  4. requirements.txt +2 -2
  5. search.py +35 -7
README.md CHANGED
@@ -1,16 +1,26 @@
1
  ---
2
  title: Grascii Search
 
3
  emoji: πŸ”Ž
4
  colorFrom: gray
5
  colorTo: green
6
  sdk: streamlit
7
- sdk_version: 1.40.2
8
  app_file: app.py
9
  pinned: true
10
  models:
11
  - grascii/gregg-vision-v0.2.1
12
  datasets:
13
  - grascii/gregg-preanniversary-words
 
 
 
 
 
 
 
 
 
14
  ---
15
 
16
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: Grascii Search
3
+ short_description: Search Grascii's Gregg Shorthand Dictionaries
4
  emoji: πŸ”Ž
5
  colorFrom: gray
6
  colorTo: green
7
  sdk: streamlit
8
+ sdk_version: 1.46.1
9
  app_file: app.py
10
  pinned: true
11
  models:
12
  - grascii/gregg-vision-v0.2.1
13
  datasets:
14
  - grascii/gregg-preanniversary-words
15
+ - grascii/gregg-preanniversary-phrases
16
+ preload_from_hub:
17
+ - grascii/gregg-vision-v0.2.1
18
+ - grascii/gregg-preanniversary-words
19
+ - grascii/gregg-preanniversary-phrases
20
+ tags:
21
+ - gregg
22
+ - shorthand
23
+ - stenography
24
  ---
25
 
26
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -13,7 +13,7 @@ st.set_page_config(
13
  )
14
 
15
  import pandas as pd # noqa E402
16
- from search import write_grascii_search, write_reverse_search # noqa E402
17
 
18
  pd.options.mode.copy_on_write = True
19
 
@@ -33,6 +33,23 @@ if st.session_state["report_submitted"]:
33
  st.toast("Thanks for the report!")
34
  st.session_state["report_submitted"] = False
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  tab1, tab2 = st.tabs(["Grascii", "Reverse"])
37
 
38
  with tab1:
 
13
  )
14
 
15
  import pandas as pd # noqa E402
16
+ from search import write_grascii_search, write_reverse_search, write_dictionaries # noqa E402
17
 
18
  pd.options.mode.copy_on_write = True
19
 
 
33
  st.toast("Thanks for the report!")
34
  st.session_state["report_submitted"] = False
35
 
36
+ with st.sidebar:
37
+ st.markdown(
38
+ """
39
+ # What's New
40
+
41
+ :blue-badge[2025-07-15]
42
+
43
+ - The preanniversary-phrases dictionary is now available!
44
+ - You may select which dictionaries you would like to search.
45
+ - To see which dictionary contained each result, hover over the result
46
+ table. Click the "eye" icon at the top right of the table and
47
+ select "Dictionary".
48
+ """
49
+ )
50
+
51
+ write_dictionaries()
52
+
53
  tab1, tab2 = st.tabs(["Grascii", "Reverse"])
54
 
55
  with tab1:
report.py CHANGED
@@ -21,6 +21,7 @@ def write_header(writer):
21
  "date",
22
  "grascii",
23
  "longhand",
 
24
  "incorrect_grascii",
25
  "incorrect_longhand",
26
  "incorrect_shorthand",
@@ -47,11 +48,11 @@ def report_dialog(data):
47
  st.write("Please select one or more reasons for flagging each row:")
48
 
49
  report_df = data
50
- report_df["3"] = False
51
  report_df["4"] = False
52
  report_df["5"] = False
53
  report_df["6"] = False
54
  report_df["7"] = False
 
55
  final_report = st.data_editor(
56
  report_df,
57
  hide_index=True,
@@ -59,17 +60,18 @@ def report_dialog(data):
59
  "0": "Grascii",
60
  "1": "Longhand",
61
  "2": st.column_config.ImageColumn("Shorthand", width="medium"),
62
- "3": st.column_config.CheckboxColumn("Grascii is incorrect"),
63
- "4": st.column_config.CheckboxColumn("Longhand is incorrect"),
64
- "5": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
65
- "6": st.column_config.CheckboxColumn(
 
66
  "Shorthand image is improperly cropped"
67
  ),
68
- "7": st.column_config.CheckboxColumn(
69
  "Shorthand image contains extraneous marks"
70
  ),
71
  },
72
- disabled=["0", "1", "2"],
73
  use_container_width=True,
74
  )
75
 
@@ -83,11 +85,11 @@ def report_dialog(data):
83
  write_header(writer)
84
  if any(
85
  [
86
- row.iloc[3],
87
  row.iloc[4],
88
  row.iloc[5],
89
  row.iloc[6],
90
  row.iloc[7],
 
91
  ]
92
  ):
93
  writer.writerow(
@@ -95,11 +97,12 @@ def report_dialog(data):
95
  datetime.now(timezone.utc).date(),
96
  row.iloc[0],
97
  row.iloc[1],
98
- 1 if row.iloc[3] else 0,
99
  1 if row.iloc[4] else 0,
100
  1 if row.iloc[5] else 0,
101
  1 if row.iloc[6] else 0,
102
  1 if row.iloc[7] else 0,
 
103
  ]
104
  )
105
 
 
21
  "date",
22
  "grascii",
23
  "longhand",
24
+ "dictionary",
25
  "incorrect_grascii",
26
  "incorrect_longhand",
27
  "incorrect_shorthand",
 
48
  st.write("Please select one or more reasons for flagging each row:")
49
 
50
  report_df = data
 
51
  report_df["4"] = False
52
  report_df["5"] = False
53
  report_df["6"] = False
54
  report_df["7"] = False
55
+ report_df["8"] = False
56
  final_report = st.data_editor(
57
  report_df,
58
  hide_index=True,
 
60
  "0": "Grascii",
61
  "1": "Longhand",
62
  "2": st.column_config.ImageColumn("Shorthand", width="medium"),
63
+ "3": "Dictionary",
64
+ "4": st.column_config.CheckboxColumn("Grascii is incorrect"),
65
+ "5": st.column_config.CheckboxColumn("Longhand is incorrect"),
66
+ "6": st.column_config.CheckboxColumn("Shorthand image is incorrect"),
67
+ "7": st.column_config.CheckboxColumn(
68
  "Shorthand image is improperly cropped"
69
  ),
70
+ "8": st.column_config.CheckboxColumn(
71
  "Shorthand image contains extraneous marks"
72
  ),
73
  },
74
+ disabled=["0", "1", "2", "3"],
75
  use_container_width=True,
76
  )
77
 
 
85
  write_header(writer)
86
  if any(
87
  [
 
88
  row.iloc[4],
89
  row.iloc[5],
90
  row.iloc[6],
91
  row.iloc[7],
92
+ row.iloc[8],
93
  ]
94
  ):
95
  writer.writerow(
 
97
  datetime.now(timezone.utc).date(),
98
  row.iloc[0],
99
  row.iloc[1],
100
+ row.iloc[3],
101
  1 if row.iloc[4] else 0,
102
  1 if row.iloc[5] else 0,
103
  1 if row.iloc[6] else 0,
104
  1 if row.iloc[7] else 0,
105
+ 1 if row.iloc[8] else 0,
106
  ]
107
  )
108
 
requirements.txt CHANGED
@@ -17,7 +17,7 @@ frozenlist==1.5.0
17
  fsspec==2024.9.0
18
  gitdb==4.0.11
19
  GitPython==3.1.43
20
- grascii==0.6.1
21
  huggingface-hub==0.26.5
22
  idna==3.10
23
  Jinja2==3.1.4
@@ -66,7 +66,7 @@ rpds-py==0.22.3
66
  safetensors==0.4.5
67
  six==1.17.0
68
  smmap==5.0.1
69
- streamlit==1.40.2
70
  sympy==1.13.1
71
  tenacity==9.0.0
72
  tokenizers==0.21.0
 
17
  fsspec==2024.9.0
18
  gitdb==4.0.11
19
  GitPython==3.1.43
20
+ grascii==0.7.0
21
  huggingface-hub==0.26.5
22
  idna==3.10
23
  Jinja2==3.1.4
 
66
  safetensors==0.4.5
67
  six==1.17.0
68
  smmap==5.0.1
69
+ streamlit==1.46.1
70
  sympy==1.13.1
71
  tenacity==9.0.0
72
  tokenizers==0.21.0
search.py CHANGED
@@ -16,9 +16,14 @@ MAX_GRASCII_LENGTH = 16
16
 
17
  @st.cache_data(show_spinner="Loading shorthand images")
18
  def load_images():
19
- ds = load_dataset(
20
- "grascii/gregg-preanniversary-words", split="train", token=st.secrets.HF_TOKEN
21
- )
 
 
 
 
 
22
  image_map = {}
23
  for row in ds:
24
  buffered = BytesIO()
@@ -31,6 +36,21 @@ def load_images():
31
  image_map = load_images()
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def on_submit():
35
  if "grascii_text_box" in st.session_state:
36
  st.session_state["grascii"] = st.session_state["grascii_text_box"]
@@ -38,12 +58,12 @@ def on_submit():
38
 
39
 
40
  def write_grascii_search():
41
- searcher = GrasciiSearcher()
42
  grascii_results = []
43
 
44
  search_by = st.radio("Search by", ["text", "image (beta)"], horizontal=True)
45
 
46
  with st.form("Grascii Search"):
 
47
  placeholder = st.empty()
48
  if search_by == "text":
49
  placeholder.text_input(
@@ -98,7 +118,7 @@ def write_grascii_search():
98
  help="""
99
  How to intepret ambiguous Grascii strings.
100
 
101
- - best: Only search using the best interpretation.
102
  - all: Search using all possible interpretations.
103
  """,
104
  )
@@ -181,6 +201,8 @@ def write_grascii_search():
181
  st.error(f"Grascii too long. Max: {MAX_GRASCII_LENGTH} characters")
182
  return
183
 
 
 
184
  try:
185
  grascii_results = searcher.sorted_search(
186
  grascii=grascii,
@@ -220,7 +242,8 @@ def write_results(results, term, key_prefix):
220
  lambda r: [
221
  r.entry.grascii,
222
  r.entry.translation,
223
- image_map.get(r.entry.translation),
 
224
  ],
225
  results,
226
  )
@@ -229,13 +252,18 @@ def write_results(results, term, key_prefix):
229
  r = "Results" if len(data) != 1 else "Result"
230
  st.write(f'{len(data)} {r} for "{term}"')
231
 
 
 
 
232
  event = st.dataframe(
233
  data,
234
  use_container_width=True,
 
235
  column_config={
236
  "0": "Grascii",
237
  "1": "Longhand",
238
  "2": st.column_config.ImageColumn("Shorthand", width="medium"),
 
239
  },
240
  selection_mode="multi-row",
241
  on_select="rerun",
@@ -253,7 +281,6 @@ def write_results(results, term, key_prefix):
253
 
254
 
255
  def write_reverse_search():
256
- searcher = ReverseSearcher()
257
  reverse_results = []
258
 
259
  with st.form("Reverse Search"):
@@ -262,6 +289,7 @@ def write_reverse_search():
262
  st.form_submit_button("Search")
263
 
264
  if word:
 
265
  reverse_results = searcher.sorted_search(
266
  reverse=word,
267
  )
 
16
 
17
  @st.cache_data(show_spinner="Loading shorthand images")
18
  def load_images():
19
+ return {
20
+ ":preanniversary": load_dataset_images("grascii/gregg-preanniversary-words"),
21
+ ":preanniversary-phrases": load_dataset_images("grascii/gregg-preanniversary-phrases"),
22
+ }
23
+
24
+
25
+ def load_dataset_images(dataset):
26
+ ds = load_dataset(dataset, split="train", token=st.secrets.HF_TOKEN)
27
  image_map = {}
28
  for row in ds:
29
  buffered = BytesIO()
 
36
  image_map = load_images()
37
 
38
 
39
+ AVAILABLE_DICTIONARIES = [":preanniversary", ":preanniversary-phrases"]
40
+
41
+
42
+ @st.fragment
43
+ def write_dictionaries():
44
+ return st.pills(
45
+ "Dictionaries",
46
+ AVAILABLE_DICTIONARIES,
47
+ default=st.session_state.get("dictionaries", AVAILABLE_DICTIONARIES),
48
+ selection_mode="multi",
49
+ key="dictionaries",
50
+ format_func=lambda d: d[1:]
51
+ )
52
+
53
+
54
  def on_submit():
55
  if "grascii_text_box" in st.session_state:
56
  st.session_state["grascii"] = st.session_state["grascii_text_box"]
 
58
 
59
 
60
  def write_grascii_search():
 
61
  grascii_results = []
62
 
63
  search_by = st.radio("Search by", ["text", "image (beta)"], horizontal=True)
64
 
65
  with st.form("Grascii Search"):
66
+
67
  placeholder = st.empty()
68
  if search_by == "text":
69
  placeholder.text_input(
 
118
  help="""
119
  How to intepret ambiguous Grascii strings.
120
 
121
+ - best: Only search using the [canonical interpretation](https://grascii.readthedocs.io/en/stable/interpretation.html#the-canonical-interpretation).
122
  - all: Search using all possible interpretations.
123
  """,
124
  )
 
201
  st.error(f"Grascii too long. Max: {MAX_GRASCII_LENGTH} characters")
202
  return
203
 
204
+ searcher = GrasciiSearcher(dictionaries=st.session_state["dictionaries"])
205
+
206
  try:
207
  grascii_results = searcher.sorted_search(
208
  grascii=grascii,
 
242
  lambda r: [
243
  r.entry.grascii,
244
  r.entry.translation,
245
+ image_map.get(r.dictionary.name, {}).get(r.entry.translation),
246
+ r.dictionary.name[1:],
247
  ],
248
  results,
249
  )
 
252
  r = "Results" if len(data) != 1 else "Result"
253
  st.write(f'{len(data)} {r} for "{term}"')
254
 
255
+ if data.empty:
256
+ return
257
+
258
  event = st.dataframe(
259
  data,
260
  use_container_width=True,
261
+ column_order=("0", "1", "2"),
262
  column_config={
263
  "0": "Grascii",
264
  "1": "Longhand",
265
  "2": st.column_config.ImageColumn("Shorthand", width="medium"),
266
+ "3": "Dictionary",
267
  },
268
  selection_mode="multi-row",
269
  on_select="rerun",
 
281
 
282
 
283
  def write_reverse_search():
 
284
  reverse_results = []
285
 
286
  with st.form("Reverse Search"):
 
289
  st.form_submit_button("Search")
290
 
291
  if word:
292
+ searcher = ReverseSearcher(dictionaries=st.session_state["dictionaries"])
293
  reverse_results = searcher.sorted_search(
294
  reverse=word,
295
  )