Synced repo using 'sync_with_huggingface' Github Action
Browse files- iscc_sct/demo.py +80 -29
- poetry.lock +5 -5
- pyproject.toml +1 -1
iscc_sct/demo.py
CHANGED
|
@@ -1,15 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
Gradio demo showcasing ISCC Semantic Text Code.
|
| 3 |
-
|
| 4 |
-
The demo features:
|
| 5 |
-
|
| 6 |
-
- two side by side text inputs.
|
| 7 |
-
- One sample text per input (One sample in english and the other a german translation of it)
|
| 8 |
-
- One slider to set global bitlength (32-256 bits in steps of 32 with 64 as default)
|
| 9 |
-
- One result output per text input
|
| 10 |
-
|
| 11 |
-
The user can select the samples or write or paste text into the inputs and generate ISCC Semantic
|
| 12 |
-
Text Codes for the Texts. Below the result outputs we show the similarity of the two codes.
|
| 13 |
"""
|
| 14 |
|
| 15 |
from loguru import logger as log
|
|
@@ -100,6 +90,34 @@ keine Informationen über die Registrierung von ISCCs.
|
|
| 100 |
]
|
| 101 |
)
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
custom_css = """
|
| 104 |
#chunked-text span.label {
|
| 105 |
text-transform: none !important;
|
|
@@ -157,37 +175,40 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
| 157 |
with gr.Column(variant="panel"):
|
| 158 |
in_text_a = gr.TextArea(
|
| 159 |
label="Text A",
|
| 160 |
-
placeholder="
|
| 161 |
lines=12,
|
| 162 |
max_lines=12,
|
| 163 |
)
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
label="Click to use sample text",
|
| 167 |
-
examples=[sample_text_en],
|
| 168 |
-
inputs=[in_text_a],
|
| 169 |
-
examples_per_page=1,
|
| 170 |
-
example_labels=[truncate_text(sample_text_en)]
|
| 171 |
)
|
| 172 |
out_code_a = gr.Textbox(label="ISCC Code for Text A")
|
| 173 |
-
gr.ClearButton(components=[in_text_a])
|
| 174 |
with gr.Column(variant="panel"):
|
| 175 |
in_text_b = gr.TextArea(
|
| 176 |
label="Text B",
|
| 177 |
-
placeholder="
|
| 178 |
lines=12,
|
| 179 |
max_lines=12,
|
| 180 |
)
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
label="Click to use sample text",
|
| 184 |
-
examples=[sample_text_de],
|
| 185 |
-
inputs=[in_text_b],
|
| 186 |
-
examples_per_page=1,
|
| 187 |
-
example_labels=[truncate_text(sample_text_de)]
|
| 188 |
)
|
| 189 |
out_code_b = gr.Textbox(label="ISCC Code for Text B")
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
with gr.Row(variant="panel"):
|
| 193 |
with gr.Column(variant="panel"):
|
|
@@ -241,10 +262,40 @@ with gr.Blocks(css=custom_css, theme=iscc_theme) as demo:
|
|
| 241 |
|
| 242 |
out_code_a.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
| 243 |
out_code_b.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
with gr.Row(variant="panel"):
|
| 245 |
with gr.Column(variant="panel"):
|
| 246 |
gr.Markdown(
|
| 247 |
-
"""
|
| 248 |
## Understanding ISCC Semantic Text-Codes
|
| 249 |
|
| 250 |
### What is an ISCC Semantic Text-Code?
|
|
|
|
| 1 |
"""
|
| 2 |
Gradio demo showcasing ISCC Semantic Text Code.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
from loguru import logger as log
|
|
|
|
| 90 |
]
|
| 91 |
)
|
| 92 |
|
| 93 |
+
sample_text_bg = "\n\n".join(
|
| 94 |
+
[
|
| 95 |
+
" ".join(paragraph.split())
|
| 96 |
+
for paragraph in """
|
| 97 |
+
Този документ определя синтаксиса и структурата на Международния стандартен код на съдържанието (ISCC) като система за
|
| 98 |
+
идентификация на цифрови активи (включително кодиране на текст, изображения, аудио, видео или друго съдържание във
|
| 99 |
+
всички медийни сектори). Той описва също метаданните на ISCC и използването на ISCC във връзка с други схеми, като
|
| 100 |
+
DOI, ISAN, ISBN, ISRC, ISSN и ISWC.
|
| 101 |
+
|
| 102 |
+
ISCC се прилага за конкретен цифров актив и представлява детерминиран дескриптор на данни, конструиран от множество
|
| 103 |
+
хеш-разходи, като се използват алгоритмите и правилата в настоящия документ. Настоящият документ не предоставя
|
| 104 |
+
информация за регистрацията на ISCC.
|
| 105 |
+
""".strip().split("\n\n")
|
| 106 |
+
]
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
sample_text_zh = "\n\n".join(
|
| 110 |
+
[
|
| 111 |
+
" ".join(paragraph.split())
|
| 112 |
+
for paragraph in """
|
| 113 |
+
本文件规定了国际标准内容代码(ISCC)的语法和结构,作为数字资产(包括所有媒 体领域的文本、图像、音频、视频或其他内容的编码)的标识系统。它还介绍了
|
| 114 |
+
ISCC 元数据以及 ISCC 与其他方案(如 DOI、ISAN、ISBN、ISRC、ISSN 和 ISWC)的结合使用。
|
| 115 |
+
|
| 116 |
+
ISCC 适用于特定的数字资产,是使用本文件中的算法和规则从多个哈希摘要中确定性地建 立起来的数据描述符。本文件不提供有关 ISCC 注册的信息。
|
| 117 |
+
""".strip().split("\n\n")
|
| 118 |
+
]
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
custom_css = """
|
| 122 |
#chunked-text span.label {
|
| 123 |
text-transform: none !important;
|
|
|
|
| 175 |
with gr.Column(variant="panel"):
|
| 176 |
in_text_a = gr.TextArea(
|
| 177 |
label="Text A",
|
| 178 |
+
placeholder="Choose sample text from the dropdown or type or paste your text.",
|
| 179 |
lines=12,
|
| 180 |
max_lines=12,
|
| 181 |
)
|
| 182 |
+
sample_dropdown_a = gr.Dropdown(
|
| 183 |
+
choices=["None", "English", "Bulgarian"], label="Select sample for Text A", value="None"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
)
|
| 185 |
out_code_a = gr.Textbox(label="ISCC Code for Text A")
|
|
|
|
| 186 |
with gr.Column(variant="panel"):
|
| 187 |
in_text_b = gr.TextArea(
|
| 188 |
label="Text B",
|
| 189 |
+
placeholder="Choose sample text from the dropdown or type or paste your text.",
|
| 190 |
lines=12,
|
| 191 |
max_lines=12,
|
| 192 |
)
|
| 193 |
+
sample_dropdown_b = gr.Dropdown(
|
| 194 |
+
choices=["None", "German", "Chinese"], label="Select sample for Text B", value="None"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
)
|
| 196 |
out_code_b = gr.Textbox(label="ISCC Code for Text B")
|
| 197 |
+
|
| 198 |
+
def update_sample_text(choice, text_a_or_b):
|
| 199 |
+
if choice == "None":
|
| 200 |
+
return ""
|
| 201 |
+
if text_a_or_b == "A":
|
| 202 |
+
return sample_text_en if choice == "English" else sample_text_bg
|
| 203 |
+
else:
|
| 204 |
+
return sample_text_de if choice == "German" else sample_text_zh
|
| 205 |
+
|
| 206 |
+
sample_dropdown_a.change(
|
| 207 |
+
lambda choice: update_sample_text(choice, "A"), inputs=[sample_dropdown_a], outputs=[in_text_a]
|
| 208 |
+
)
|
| 209 |
+
sample_dropdown_b.change(
|
| 210 |
+
lambda choice: update_sample_text(choice, "B"), inputs=[sample_dropdown_b], outputs=[in_text_b]
|
| 211 |
+
)
|
| 212 |
|
| 213 |
with gr.Row(variant="panel"):
|
| 214 |
with gr.Column(variant="panel"):
|
|
|
|
| 262 |
|
| 263 |
out_code_a.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
| 264 |
out_code_b.change(compare_codes, inputs=[out_code_a, out_code_b, in_iscc_bits], outputs=[out_similarity])
|
| 265 |
+
|
| 266 |
+
def reset_all():
|
| 267 |
+
return (
|
| 268 |
+
gr.Slider(value=128), # Reset ISCC Bit-Length
|
| 269 |
+
gr.Dropdown(value="None"), # Reset sample dropdown A
|
| 270 |
+
gr.Dropdown(value="None"), # Reset sample dropdown B
|
| 271 |
+
gr.TextArea(value=""), # Reset Text A
|
| 272 |
+
gr.TextArea(value=""), # Reset Text B
|
| 273 |
+
gr.Textbox(value=""), # Reset ISCC Code for Text A
|
| 274 |
+
gr.Textbox(value=""), # Reset ISCC Code for Text B
|
| 275 |
+
gr.HTML(value=""), # Reset Similarity
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
with gr.Row(variant="panel"):
|
| 279 |
+
reset_button = gr.Button("Reset All")
|
| 280 |
+
|
| 281 |
+
reset_button.click(
|
| 282 |
+
reset_all,
|
| 283 |
+
outputs=[
|
| 284 |
+
in_iscc_bits,
|
| 285 |
+
sample_dropdown_a,
|
| 286 |
+
sample_dropdown_b,
|
| 287 |
+
in_text_a,
|
| 288 |
+
in_text_b,
|
| 289 |
+
out_code_a,
|
| 290 |
+
out_code_b,
|
| 291 |
+
out_similarity,
|
| 292 |
+
],
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
with gr.Row(variant="panel"):
|
| 296 |
with gr.Column(variant="panel"):
|
| 297 |
gr.Markdown(
|
| 298 |
+
"""
|
| 299 |
## Understanding ISCC Semantic Text-Codes
|
| 300 |
|
| 301 |
### What is an ISCC Semantic Text-Code?
|
poetry.lock
CHANGED
|
@@ -774,21 +774,21 @@ test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "p
|
|
| 774 |
|
| 775 |
[[package]]
|
| 776 |
name = "importlib-resources"
|
| 777 |
-
version = "6.4.
|
| 778 |
description = "Read resources from Python packages"
|
| 779 |
optional = true
|
| 780 |
python-versions = ">=3.8"
|
| 781 |
files = [
|
| 782 |
-
{file = "importlib_resources-6.4.
|
| 783 |
-
{file = "importlib_resources-6.4.
|
| 784 |
]
|
| 785 |
|
| 786 |
[package.dependencies]
|
| 787 |
zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
|
| 788 |
|
| 789 |
[package.extras]
|
| 790 |
-
|
| 791 |
-
|
| 792 |
|
| 793 |
[[package]]
|
| 794 |
name = "iniconfig"
|
|
|
|
| 774 |
|
| 775 |
[[package]]
|
| 776 |
name = "importlib-resources"
|
| 777 |
+
version = "6.4.2"
|
| 778 |
description = "Read resources from Python packages"
|
| 779 |
optional = true
|
| 780 |
python-versions = ">=3.8"
|
| 781 |
files = [
|
| 782 |
+
{file = "importlib_resources-6.4.2-py3-none-any.whl", hash = "sha256:8bba8c54a8a3afaa1419910845fa26ebd706dc716dd208d9b158b4b6966f5c5c"},
|
| 783 |
+
{file = "importlib_resources-6.4.2.tar.gz", hash = "sha256:6cbfbefc449cc6e2095dd184691b7a12a04f40bc75dd4c55d31c34f174cdf57a"},
|
| 784 |
]
|
| 785 |
|
| 786 |
[package.dependencies]
|
| 787 |
zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
|
| 788 |
|
| 789 |
[package.extras]
|
| 790 |
+
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
|
| 791 |
+
test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
|
| 792 |
|
| 793 |
[[package]]
|
| 794 |
name = "iniconfig"
|
pyproject.toml
CHANGED
|
@@ -85,7 +85,7 @@ line-length = 119
|
|
| 85 |
line-ending = "lf"
|
| 86 |
|
| 87 |
[tool.coverage.run]
|
| 88 |
-
omit = ["iscc_sct/dev.py", "tests/"]
|
| 89 |
|
| 90 |
[tool.poe.tasks]
|
| 91 |
format-code = { cmd = "ruff format", help = "Code style formating with ruff" }
|
|
|
|
| 85 |
line-ending = "lf"
|
| 86 |
|
| 87 |
[tool.coverage.run]
|
| 88 |
+
omit = ["iscc_sct/dev.py", "tests/", "iscc_sct/demo.py"]
|
| 89 |
|
| 90 |
[tool.poe.tasks]
|
| 91 |
format-code = { cmd = "ruff format", help = "Code style formating with ruff" }
|