titusz commited on
Commit
0f2b045
·
verified ·
1 Parent(s): a482560

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (3) hide show
  1. iscc_sct/demo.py +80 -29
  2. poetry.lock +5 -5
  3. pyproject.toml +1 -1
iscc_sct/demo.py CHANGED
@@ -1,15 +1,5 @@
1
  """
2
  Gradio demo showcasing ISCC Semantic Text Code.
3
-
4
- The demo features:
5
-
6
- - two side by side text inputs.
7
- - One sample text per input (One sample in english and the other a german translation of it)
8
- - One slider to set global bitlength (32-256 bits in steps of 32 with 64 as default)
9
- - One result output per text input
10
-
11
- The user can select the samples or write or paste text into the inputs and generate ISCC Semantic
12
- Text Codes for the Texts. Below the result outputs we show the similarity of the two codes.
13
  """
14
 
15
  from loguru import logger as log
@@ -100,6 +90,34 @@ keine Informationen über die Registrierung von ISCCs.
100
  ]
101
  )
102
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  custom_css = """
104
  #chunked-text span.label {
105
  text-transform: none !important;
@@ -157,37 +175,40 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
157
  with gr.Column(variant="panel"):
158
  in_text_a = gr.TextArea(
159
  label="Text A",
160
- placeholder="Click the sample text below or type or paste your text.",
161
  lines=12,
162
  max_lines=12,
163
  )
164
-
165
- gr.Examples(
166
- label="Click to use sample text",
167
- examples=[sample_text_en],
168
- inputs=[in_text_a],
169
- examples_per_page=1,
170
- example_labels=[truncate_text(sample_text_en)]
171
  )
172
  out_code_a = gr.Textbox(label="ISCC Code for Text A")
173
- gr.ClearButton(components=[in_text_a])
174
  with gr.Column(variant="panel"):
175
  in_text_b = gr.TextArea(
176
  label="Text B",
177
- placeholder="Click the sample text below or type or paste your text.",
178
  lines=12,
179
  max_lines=12,
180
  )
181
-
182
- gr.Examples(
183
- label="Click to use sample text",
184
- examples=[sample_text_de],
185
- inputs=[in_text_b],
186
- examples_per_page=1,
187
- example_labels=[truncate_text(sample_text_de)]
188
  )
189
  out_code_b = gr.Textbox(label="ISCC Code for Text B")
190
- gr.ClearButton(components=[in_text_b])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  with gr.Row(variant="panel"):
193
  with gr.Column(variant="panel"):
@@ -241,10 +262,40 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
241
 
242
  out_code_a.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
243
  out_code_b.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  with gr.Row(variant="panel"):
245
  with gr.Column(variant="panel"):
246
  gr.Markdown(
247
- """
248
  ## Understanding ISCC Semantic Text-Codes
249
 
250
  ### What is an ISCC Semantic Text-Code?
 
1
  """
2
  Gradio demo showcasing ISCC Semantic Text Code.
 
 
 
 
 
 
 
 
 
 
3
  """
4
 
5
  from loguru import logger as log
 
90
  ]
91
  )
92
 
93
+ sample_text_bg = "\n\n".join(
94
+ [
95
+ " ".join(paragraph.split())
96
+ for paragraph in """
97
+ Този документ определя синтаксиса и структурата на Международния стандартен код на съдържанието (ISCC) като система за
98
+ идентификация на цифрови активи (включително кодиране на текст, изображения, аудио, видео или друго съдържание във
99
+ всички медийни сектори). Той описва също метаданните на ISCC и използването на ISCC във връзка с други схеми, като
100
+ DOI, ISAN, ISBN, ISRC, ISSN и ISWC.
101
+
102
+ ISCC се прилага за конкретен цифров актив и представлява детерминиран дескриптор на данни, конструиран от множество
103
+ хеш-разходи, като се използват алгоритмите и правилата в настоящия документ. Настоящият документ не предоставя
104
+ информация за регистрацията на ISCC.
105
+ """.strip().split("\n\n")
106
+ ]
107
+ )
108
+
109
+ sample_text_zh = "\n\n".join(
110
+ [
111
+ " ".join(paragraph.split())
112
+ for paragraph in """
113
+ 本文件规定了国际标准内容代码(ISCC)的语法和结构,作为数字资产(包括所有媒 体领域的文本、图像、音频、视频或其他内容的编码)的标识系统。它还介绍了
114
+ ISCC 元数据以及 ISCC 与其他方案(如 DOI、ISAN、ISBN、ISRC、ISSN 和 ISWC)的结合使用。
115
+
116
+ ISCC 适用于特定的数字资产,是使用本文件中的算法和规则从多个哈希摘要中确定性地建 立起来的数据描述符。本文件不提供有关 ISCC 注册的信息。
117
+ """.strip().split("\n\n")
118
+ ]
119
+ )
120
+
121
  custom_css = """
122
  #chunked-text span.label {
123
  text-transform: none !important;
 
175
  with gr.Column(variant="panel"):
176
  in_text_a = gr.TextArea(
177
  label="Text A",
178
+ placeholder="Choose sample text from the dropdown or type or paste your text.",
179
  lines=12,
180
  max_lines=12,
181
  )
182
+ sample_dropdown_a = gr.Dropdown(
183
+ choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
 
 
 
 
 
184
  )
185
  out_code_a = gr.Textbox(label="ISCC Code for Text A")
 
186
  with gr.Column(variant="panel"):
187
  in_text_b = gr.TextArea(
188
  label="Text B",
189
+ placeholder="Choose sample text from the dropdown or type or paste your text.",
190
  lines=12,
191
  max_lines=12,
192
  )
193
+ sample_dropdown_b = gr.Dropdown(
194
+ choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
 
 
 
 
 
195
  )
196
  out_code_b = gr.Textbox(label="ISCC Code for Text B")
197
+
198
+ def update_sample_text(choice, text_a_or_b):
199
+ if choice == "None":
200
+ return ""
201
+ if text_a_or_b == "A":
202
+ return sample_text_en if choice == "English" else sample_text_bg
203
+ else:
204
+ return sample_text_de if choice == "German" else sample_text_zh
205
+
206
+ sample_dropdown_a.change(
207
+ lambda choice: update_sample_text(choice, "A"), inputs=[sample_dropdown_a], outputs=[in_text_a]
208
+ )
209
+ sample_dropdown_b.change(
210
+ lambda choice: update_sample_text(choice, "B"), inputs=[sample_dropdown_b], outputs=[in_text_b]
211
+ )
212
 
213
  with gr.Row(variant="panel"):
214
  with gr.Column(variant="panel"):
 
262
 
263
  out_code_a.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
264
  out_code_b.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
265
+
266
+ def reset_all():
267
+ return (
268
+ gr.Slider(value=128), # Reset ISCC Bit-Length
269
+ gr.Dropdown(value="None"), # Reset sample dropdown A
270
+ gr.Dropdown(value="None"), # Reset sample dropdown B
271
+ gr.TextArea(value=""), # Reset Text A
272
+ gr.TextArea(value=""), # Reset Text B
273
+ gr.Textbox(value=""), # Reset ISCC Code for Text A
274
+ gr.Textbox(value=""), # Reset ISCC Code for Text B
275
+ gr.HTML(value=""), # Reset Similarity
276
+ )
277
+
278
+ with gr.Row(variant="panel"):
279
+ reset_button = gr.Button("Reset All")
280
+
281
+ reset_button.click(
282
+ reset_all,
283
+ outputs=[
284
+ in_iscc_bits,
285
+ sample_dropdown_a,
286
+ sample_dropdown_b,
287
+ in_text_a,
288
+ in_text_b,
289
+ out_code_a,
290
+ out_code_b,
291
+ out_similarity,
292
+ ],
293
+ )
294
+
295
  with gr.Row(variant="panel"):
296
  with gr.Column(variant="panel"):
297
  gr.Markdown(
298
+ """
299
  ## Understanding ISCC Semantic Text-Codes
300
 
301
  ### What is an ISCC Semantic Text-Code?
poetry.lock CHANGED
@@ -774,21 +774,21 @@ test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "p
774
 
775
  [[package]]
776
  name = "importlib-resources"
777
- version = "6.4.0"
778
  description = "Read resources from Python packages"
779
  optional = true
780
  python-versions = ">=3.8"
781
  files = [
782
- {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"},
783
- {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"},
784
  ]
785
 
786
  [package.dependencies]
787
  zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
788
 
789
  [package.extras]
790
- docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
791
- testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
792
 
793
  [[package]]
794
  name = "iniconfig"
 
774
 
775
  [[package]]
776
  name = "importlib-resources"
777
+ version = "6.4.2"
778
  description = "Read resources from Python packages"
779
  optional = true
780
  python-versions = ">=3.8"
781
  files = [
782
+ {file = "importlib_resources-6.4.2-py3-none-any.whl", hash = "sha256:8bba8c54a8a3afaa1419910845fa26ebd706dc716dd208d9b158b4b6966f5c5c"},
783
+ {file = "importlib_resources-6.4.2.tar.gz", hash = "sha256:6cbfbefc449cc6e2095dd184691b7a12a04f40bc75dd4c55d31c34f174cdf57a"},
784
  ]
785
 
786
  [package.dependencies]
787
  zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
788
 
789
  [package.extras]
790
+ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
791
+ test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
792
 
793
  [[package]]
794
  name = "iniconfig"
pyproject.toml CHANGED
@@ -85,7 +85,7 @@ line-length = 119
85
  line-ending = "lf"
86
 
87
  [tool.coverage.run]
88
- omit = ["iscc_sct/dev.py", "tests/"]
89
 
90
  [tool.poe.tasks]
91
  format-code = { cmd = "ruff format", help = "Code style formating with ruff" }
 
85
  line-ending = "lf"
86
 
87
  [tool.coverage.run]
88
+ omit = ["iscc_sct/dev.py", "tests/", "iscc_sct/demo.py"]
89
 
90
  [tool.poe.tasks]
91
  format-code = { cmd = "ruff format", help = "Code style formating with ruff" }