Synced repo using 'sync_with_huggingface' Github Action
Browse files- iscc_sct/demo.py +80 -29
- poetry.lock +5 -5
- pyproject.toml +1 -1
iscc_sct/demo.py
CHANGED
@@ -1,15 +1,5 @@
|
|
1 |
"""
|
2 |
Gradio demo showcasing ISCC Semantic Text Code.
|
3 |
-
|
4 |
-
The demo features:
|
5 |
-
|
6 |
-
- two side by side text inputs.
|
7 |
-
- One sample text per input (One sample in english and the other a german translation of it)
|
8 |
-
- One slider to set global bitlength (32-256 bits in steps of 32 with 64 as default)
|
9 |
-
- One result output per text input
|
10 |
-
|
11 |
-
The user can select the samples or write or paste text into the inputs and generate ISCC Semantic
|
12 |
-
Text Codes for the Texts. Below the result outputs we show the similarity of the two codes.
|
13 |
"""
|
14 |
|
15 |
from loguru import logger as log
|
@@ -100,6 +90,34 @@ keine Informationen über die Registrierung von ISCCs.
|
|
100 |
]
|
101 |
)
|
102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
103 |
custom_css = """
|
104 |
#chunked-text span.label {
|
105 |
text-transform: none !important;
|
@@ -157,37 +175,40 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
157 |
with gr.Column(variant="panel"):
|
158 |
in_text_a = gr.TextArea(
|
159 |
label="Text A",
|
160 |
-
placeholder="
|
161 |
lines=12,
|
162 |
max_lines=12,
|
163 |
)
|
164 |
-
|
165 |
-
|
166 |
-
label="Click to use sample text",
|
167 |
-
examples=[sample_text_en],
|
168 |
-
inputs=[in_text_a],
|
169 |
-
examples_per_page=1,
|
170 |
-
example_labels=[truncate_text(sample_text_en)]
|
171 |
)
|
172 |
out_code_a = gr.Textbox(label="ISCC Code for Text A")
|
173 |
-
gr.ClearButton(components=[in_text_a])
|
174 |
with gr.Column(variant="panel"):
|
175 |
in_text_b = gr.TextArea(
|
176 |
label="Text B",
|
177 |
-
placeholder="
|
178 |
lines=12,
|
179 |
max_lines=12,
|
180 |
)
|
181 |
-
|
182 |
-
|
183 |
-
label="Click to use sample text",
|
184 |
-
examples=[sample_text_de],
|
185 |
-
inputs=[in_text_b],
|
186 |
-
examples_per_page=1,
|
187 |
-
example_labels=[truncate_text(sample_text_de)]
|
188 |
)
|
189 |
out_code_b = gr.Textbox(label="ISCC Code for Text B")
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
with gr.Row(variant="panel"):
|
193 |
with gr.Column(variant="panel"):
|
@@ -241,10 +262,40 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
241 |
|
242 |
out_code_a.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
243 |
out_code_b.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
with gr.Row(variant="panel"):
|
245 |
with gr.Column(variant="panel"):
|
246 |
gr.Markdown(
|
247 |
-
"""
|
248 |
## Understanding ISCC Semantic Text-Codes
|
249 |
|
250 |
### What is an ISCC Semantic Text-Code?
|
|
|
1 |
"""
|
2 |
Gradio demo showcasing ISCC Semantic Text Code.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
"""
|
4 |
|
5 |
from loguru import logger as log
|
|
|
90 |
]
|
91 |
)
|
92 |
|
93 |
+
sample_text_bg = "\n\n".join(
|
94 |
+
[
|
95 |
+
" ".join(paragraph.split())
|
96 |
+
for paragraph in """
|
97 |
+
Този документ определя синтаксиса и структурата на Международния стандартен код на съдържанието (ISCC) като система за
|
98 |
+
идентификация на цифрови активи (включително кодиране на текст, изображения, аудио, видео или друго съдържание във
|
99 |
+
всички медийни сектори). Той описва също метаданните на ISCC и използването на ISCC във връзка с други схеми, като
|
100 |
+
DOI, ISAN, ISBN, ISRC, ISSN и ISWC.
|
101 |
+
|
102 |
+
ISCC се прилага за конкретен цифров актив и представлява детерминиран дескриптор на данни, конструиран от множество
|
103 |
+
хеш-разходи, като се използват алгоритмите и правилата в настоящия документ. Настоящият документ не предоставя
|
104 |
+
информация за регистрацията на ISCC.
|
105 |
+
""".strip().split("\n\n")
|
106 |
+
]
|
107 |
+
)
|
108 |
+
|
109 |
+
sample_text_zh = "\n\n".join(
|
110 |
+
[
|
111 |
+
" ".join(paragraph.split())
|
112 |
+
for paragraph in """
|
113 |
+
本文件规定了国际标准内容代码(ISCC)的语法和结构,作为数字资产(包括所有媒 体领域的文本、图像、音频、视频或其他内容的编码)的标识系统。它还介绍了
|
114 |
+
ISCC 元数据以及 ISCC 与其他方案(如 DOI、ISAN、ISBN、ISRC、ISSN 和 ISWC)的结合使用。
|
115 |
+
|
116 |
+
ISCC 适用于特定的数字资产,是使用本文件中的算法和规则从多个哈希摘要中确定性地建 立起来的数据描述符。本文件不提供有关 ISCC 注册的信息。
|
117 |
+
""".strip().split("\n\n")
|
118 |
+
]
|
119 |
+
)
|
120 |
+
|
121 |
custom_css = """
|
122 |
#chunked-text span.label {
|
123 |
text-transform: none !important;
|
|
|
175 |
with gr.Column(variant="panel"):
|
176 |
in_text_a = gr.TextArea(
|
177 |
label="Text A",
|
178 |
+
placeholder="Choose sample text from the dropdown or type or paste your text.",
|
179 |
lines=12,
|
180 |
max_lines=12,
|
181 |
)
|
182 |
+
sample_dropdown_a = gr.Dropdown(
|
183 |
+
choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
|
|
|
|
|
|
|
|
|
|
|
184 |
)
|
185 |
out_code_a = gr.Textbox(label="ISCC Code for Text A")
|
|
|
186 |
with gr.Column(variant="panel"):
|
187 |
in_text_b = gr.TextArea(
|
188 |
label="Text B",
|
189 |
+
placeholder="Choose sample text from the dropdown or type or paste your text.",
|
190 |
lines=12,
|
191 |
max_lines=12,
|
192 |
)
|
193 |
+
sample_dropdown_b = gr.Dropdown(
|
194 |
+
choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
|
|
|
|
|
|
|
|
|
|
|
195 |
)
|
196 |
out_code_b = gr.Textbox(label="ISCC Code for Text B")
|
197 |
+
|
198 |
+
def update_sample_text(choice, text_a_or_b):
|
199 |
+
if choice == "None":
|
200 |
+
return ""
|
201 |
+
if text_a_or_b == "A":
|
202 |
+
return sample_text_en if choice == "English" else sample_text_bg
|
203 |
+
else:
|
204 |
+
return sample_text_de if choice == "German" else sample_text_zh
|
205 |
+
|
206 |
+
sample_dropdown_a.change(
|
207 |
+
lambda choice: update_sample_text(choice, "A"), inputs=[sample_dropdown_a], outputs=[in_text_a]
|
208 |
+
)
|
209 |
+
sample_dropdown_b.change(
|
210 |
+
lambda choice: update_sample_text(choice, "B"), inputs=[sample_dropdown_b], outputs=[in_text_b]
|
211 |
+
)
|
212 |
|
213 |
with gr.Row(variant="panel"):
|
214 |
with gr.Column(variant="panel"):
|
|
|
262 |
|
263 |
out_code_a.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
264 |
out_code_b.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
265 |
+
|
266 |
+
def reset_all():
|
267 |
+
return (
|
268 |
+
gr.Slider(value=128), # Reset ISCC Bit-Length
|
269 |
+
gr.Dropdown(value="None"), # Reset sample dropdown A
|
270 |
+
gr.Dropdown(value="None"), # Reset sample dropdown B
|
271 |
+
gr.TextArea(value=""), # Reset Text A
|
272 |
+
gr.TextArea(value=""), # Reset Text B
|
273 |
+
gr.Textbox(value=""), # Reset ISCC Code for Text A
|
274 |
+
gr.Textbox(value=""), # Reset ISCC Code for Text B
|
275 |
+
gr.HTML(value=""), # Reset Similarity
|
276 |
+
)
|
277 |
+
|
278 |
+
with gr.Row(variant="panel"):
|
279 |
+
reset_button = gr.Button("Reset All")
|
280 |
+
|
281 |
+
reset_button.click(
|
282 |
+
reset_all,
|
283 |
+
outputs=[
|
284 |
+
in_iscc_bits,
|
285 |
+
sample_dropdown_a,
|
286 |
+
sample_dropdown_b,
|
287 |
+
in_text_a,
|
288 |
+
in_text_b,
|
289 |
+
out_code_a,
|
290 |
+
out_code_b,
|
291 |
+
out_similarity,
|
292 |
+
],
|
293 |
+
)
|
294 |
+
|
295 |
with gr.Row(variant="panel"):
|
296 |
with gr.Column(variant="panel"):
|
297 |
gr.Markdown(
|
298 |
+
"""
|
299 |
## Understanding ISCC Semantic Text-Codes
|
300 |
|
301 |
### What is an ISCC Semantic Text-Code?
|
poetry.lock
CHANGED
@@ -774,21 +774,21 @@ test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "p
|
|
774 |
|
775 |
[[package]]
|
776 |
name = "importlib-resources"
|
777 |
-
version = "6.4.
|
778 |
description = "Read resources from Python packages"
|
779 |
optional = true
|
780 |
python-versions = ">=3.8"
|
781 |
files = [
|
782 |
-
{file = "importlib_resources-6.4.
|
783 |
-
{file = "importlib_resources-6.4.
|
784 |
]
|
785 |
|
786 |
[package.dependencies]
|
787 |
zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
|
788 |
|
789 |
[package.extras]
|
790 |
-
|
791 |
-
|
792 |
|
793 |
[[package]]
|
794 |
name = "iniconfig"
|
|
|
774 |
|
775 |
[[package]]
|
776 |
name = "importlib-resources"
|
777 |
+
version = "6.4.2"
|
778 |
description = "Read resources from Python packages"
|
779 |
optional = true
|
780 |
python-versions = ">=3.8"
|
781 |
files = [
|
782 |
+
{file = "importlib_resources-6.4.2-py3-none-any.whl", hash = "sha256:8bba8c54a8a3afaa1419910845fa26ebd706dc716dd208d9b158b4b6966f5c5c"},
|
783 |
+
{file = "importlib_resources-6.4.2.tar.gz", hash = "sha256:6cbfbefc449cc6e2095dd184691b7a12a04f40bc75dd4c55d31c34f174cdf57a"},
|
784 |
]
|
785 |
|
786 |
[package.dependencies]
|
787 |
zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
|
788 |
|
789 |
[package.extras]
|
790 |
+
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
|
791 |
+
test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
|
792 |
|
793 |
[[package]]
|
794 |
name = "iniconfig"
|
pyproject.toml
CHANGED
@@ -85,7 +85,7 @@ line-length = 119
|
|
85 |
line-ending = "lf"
|
86 |
|
87 |
[tool.coverage.run]
|
88 |
-
omit = ["iscc_sct/dev.py", "tests/"]
|
89 |
|
90 |
[tool.poe.tasks]
|
91 |
format-code = { cmd = "ruff format", help = "Code style formating with ruff" }
|
|
|
85 |
line-ending = "lf"
|
86 |
|
87 |
[tool.coverage.run]
|
88 |
+
omit = ["iscc_sct/dev.py", "tests/", "iscc_sct/demo.py"]
|
89 |
|
90 |
[tool.poe.tasks]
|
91 |
format-code = { cmd = "ruff format", help = "Code style formating with ruff" }
|