import base64
import io
from loguru import logger as log
from pathlib import Path
import gradio as gr
from PIL import Image
import iscc_core as ic
import iscc_sdk as idk
import iscc_schema as iss
import iscc_sci as sci
import plotly.graph_objects as go
import pandas as pd
idk.sdk_opts.image_thumbnail_size = 265
idk.sdk_opts.image_thumbnail_quality = 80
HERE = Path(__file__).parent.absolute()
IMAGES1 = HERE / "images1"
IMAGES2 = HERE / "images2"
custom_css = """
.fixed-height {
height: 240px; /* Fixed height */
object-fit: contain; /* Scale the image to fit within the element */
}
.small-height {
display: flex; /* Use flexbox layout */
flex-direction: column; /* Arrange children vertically */
justify-content: flex-end; /* Align children to the end (bottom) */
height: 85px; /* Fixed height */
object-fit: contain; /* Scale the content to fit within the element */
}
.bit-matrix-big {
display: flex;
flex-direction: column;
justify-content: flex-end;
height: 120px; /* Fixed height */
object-fit: contain; /* Scale the content to fit within the element */
}
.iscc-unit-sim {
display: flex;
flex-direction: column;
justify-content: flex-end;
height: 120px; /* Fixed height */
object-fit: contain; /* Scale the content to fit within the element */
}
.modebar-btn {
display: none !important;
}
#examples-a, #examples-b {
height: 140px; /* Fixed height */
object-fit: contain; /* Scale the image to fit within the element */
}
"""
def iscc_semantic(filepath: str) -> idk.IsccMeta:
"""Generate ISCC-CODE extended with Semantic-Code for supported modalities (Image)"""
imeta = idk.code_iscc(filepath)
if imeta.mode == "image":
# Inject Semantic-Code
sci_code = sci.code_image_semantic(filepath, bits=64)["iscc"]
units = ic.iscc_decompose(imeta.iscc)
units.append(sci_code)
iscc_code_s = ic.gen_iscc_code(units)["iscc"]
imeta.iscc = iscc_code_s
return imeta
def dist_to_sim(data, dim=64):
result = {}
for k, v in data.items():
if k == "instance_match":
result[k.split("_")[0].title()] = 1.0 if v is True else -1.0
else:
result[k.split("_")[0].title()] = hamming_to_cosine(v, dim)
return result
def hamming_to_cosine(hamming_distance: int, dim: int) -> float:
"""Aproximate the cosine similarity for a given hamming distance and dimension"""
result = 1 - (2 * hamming_distance) / dim
log.debug(f"Hamming distance: {hamming_distance} - Dim: {dim} - Result: {result}")
return result
def similarity_plot(sim_data):
# type: (dict) -> go.Figure
# Convert input dictionary to DataFrame, sort by value for visual consistency
data_df = pd.DataFrame(reversed(sim_data.items()), columns=["Category", "Value"])
data_df["Percentage"] = data_df["Value"] * 100 # Convert to percentage
# Define color for bars based on value
data_df["Color"] = ["#f56169" if x < 0 else "#a6db50" for x in data_df["Value"]]
# Create Plotly Figure
fig = go.Figure()
fig.add_trace(
go.Bar(
x=data_df["Value"],
y=data_df["Category"],
orientation="h",
marker_color=data_df["Color"],
marker_line={"width": 0},
text=data_df["Percentage"].apply(lambda x: f"{x:.2f}%"),
textposition="inside",
textfont={
"size": 14,
"family": "JetBrains Mono",
"color": "white",
},
hoverinfo=None,
hovertemplate="ISCC-UNIT: %{y}
SIMILARITY: %{x}",
hoverlabel={
"font": {"family": "JetBrains Mono", "color": "#FFFFFF"},
"bgcolor": "#444444",
},
)
)
# Update layout for aesthetics
fig.update_layout(
height=len(sim_data) * 40,
autosize=True,
xaxis=dict(
title="",
tickformat=",.0%",
showticklabels=False,
),
yaxis=dict(
title="",
showticklabels=False,
),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
showlegend=False,
modebar_remove=[
"toImage",
"zoom",
"pan",
"zoomIn",
"zoomOut",
"autoScale",
"resetScale",
],
)
# Adjust the x-axis to accommodate percentage labels
fig.update_xaxes(
range=[-1.1, 1.1],
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
return fig
def bit_matrix_plot(iscc_code):
# type: (ic.Code) -> go.Figure
"""
Create a bit matrix plot for an ISCC-CODE
"""
# Decode ISCC-CODE
data = {}
for unit in ic.iscc_decompose(iscc_code.code):
unit = ic.Code(unit)
data[unit.type_id.split("-")[0]] = unit.hash_bits
# Prepare data for heatmap
z = []
for key, value in data.items():
z.append([int(bit) for bit in value])
# Define colors for 0 and 1 bits
colorscale = [[0, "#7ac2f7"], [1, "#0054b2"]]
# Build Plotly Visualization
fig = go.Figure(
data=go.Heatmap(
z=z,
xgap=2,
ygap=2,
showscale=False,
colorscale=colorscale,
hoverinfo="x+y",
hovertemplate="ISCC-UNIT: %{y}
BIT-NUMBR: %{x}
BIT-VALUE: %{z}",
hoverlabel={
"font": {"family": "JetBrains Mono"},
},
)
)
fig.update_layout(
height=60,
autosize=True,
xaxis=dict(
ticks="",
side="top",
scaleanchor="y",
constrain="domain",
showticklabels=False,
),
yaxis=dict(
ticks="",
tickvals=list(range(len(data))),
ticktext=list(data.keys()),
side="left",
autorange="reversed",
showticklabels=False,
),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
margin=dict(l=10, r=10, t=0, b=10),
modebar_remove=[
"toImage",
"zoom",
"pan",
"zoomIn",
"zoomOut",
"autoScale",
"resetScale",
],
)
fig.update_xaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
fig.update_yaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
return fig
def bit_comparison(iscc_code1, iscc_code2):
"""
Create a comparison bit matrix plot for two ISCC-CODES
"""
# Decode ISCC-CODEs
data1, data2 = {}, {}
for unit in ic.iscc_decompose(iscc_code1):
unit = ic.Code(unit)
data1[unit.type_id.split("-")[0]] = unit.hash_bits
for unit in ic.iscc_decompose(iscc_code2):
unit = ic.Code(unit)
data2[unit.type_id.split("-")[0]] = unit.hash_bits
# Prepare data for heatmap comparison
z = []
text = []
for key in data1.keys():
z_row = []
text_row = []
for bit1, bit2 in zip(data1[key], data2.get(key, "")):
if bit1 == bit2:
z_row.append(int(bit1))
text_row.append(bit1)
else:
z_row.append(2)
text_row.append("x")
z.append(z_row)
text.append(text_row)
# Define colors for 0, 1, and non-matching bits
colorscale = [[0, "#a6db50"], [0.5, "#a6db50"], [1, "#f56169"]]
fig = go.Figure(
data=go.Heatmap(
z=z,
text=text,
xgap=2,
ygap=2,
showscale=False,
colorscale=colorscale,
hoverinfo="text",
hovertemplate="ISCC-UNIT: %{y}
BIT-NUMBR: %{x}
BIT-VALUE: %{z}",
hoverlabel={
"font": {"family": "JetBrains Mono"},
},
texttemplate="%{text}", # Use "%{text}" for showing bits
textfont={
"size": 14,
"color": "#FFFFFF",
"family": "JetBrains Mono",
},
)
)
fig.update_layout(
height=120,
autosize=True,
xaxis=dict(
ticks="",
side="top",
scaleanchor="y",
constrain="domain",
showticklabels=False,
),
yaxis=dict(
ticks="",
tickvals=list(range(len(data1))),
ticktext=list(data1.keys()),
side="left",
autorange="reversed",
showticklabels=False,
),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
margin=dict(l=0, r=0, t=0, b=0),
modebar_remove=[
"toImage",
"zoom",
"pan",
"zoomIn",
"zoomOut",
"autoScale",
"resetScale",
],
)
fig.update_xaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
fig.update_yaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
return fig
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("## ⚙️ ISCC Similarity Comparison")
with gr.Row(variant="default", equal_height=True):
with gr.Column(variant="compact"):
in_file_a = gr.File(
label="Media File A", type="filepath", elem_classes=["fixed-height"]
)
out_thumb_a = gr.Image(
label="Extracted Thumbnail",
visible=False,
height=240,
elem_classes=["fixed-height"],
interactive=True,
show_download_button=False,
sources=["upload"],
)
# Proxy component to patch image example selection -> gr.File
dumy_image_a = gr.Image(visible=False, type="filepath", height=240)
gr.Examples(
examples=IMAGES1.as_posix(),
cache_examples=False,
inputs=[dumy_image_a],
elem_id="examples-a",
)
out_iscc_a = gr.Text(label="ISCC", show_copy_button=True)
with gr.Accordion(label="Details", open=False):
out_dna_a = gr.Plot(
label="BIT-MATRIX",
container=True,
elem_classes=["small-height"],
)
out_meta_a = gr.Code(language="json", label="ISCC Metadata")
with gr.Column(variant="compact"):
in_file_b = gr.File(
label="Media File B", type="filepath", elem_classes=["fixed-height"]
)
out_thumb_b = gr.Image(
label="Extracted Thumbnail",
visible=False,
height=240,
elem_classes=["fixed-height"],
interactive=True,
show_download_button=False,
sources=["upload"],
)
# Proxy component to patch image example selection -> gr.File
dumy_image_b = gr.Image(visible=False, type="filepath", height=240)
gr.Examples(
examples=IMAGES2.as_posix(),
cache_examples=False,
inputs=[dumy_image_b],
elem_id="examples-b",
)
out_iscc_b = gr.Text(label="ISCC", show_copy_button=True)
with gr.Accordion(
label="Details",
open=False,
):
out_dna_b = gr.Plot(
label="BIT-MATRIX",
container=True,
elem_classes=["small-height"],
)
out_meta_b = gr.Code(language="json", label="ISCC Metadata")
with gr.Row(variant="default", equal_height=True):
with gr.Column(variant="compact"):
out_bitcompare = gr.Plot(
label="BIT-MATRIX Comparison",
container=True,
elem_classes=["bit-matrix-big"],
)
with gr.Row(variant="default", equal_height=True):
with gr.Column(variant="compact"):
out_compare = gr.Plot(
label="ISCC-UNIT Similarities",
container=True,
elem_classes=["iscc-unit-sim"],
)
# Custom footer
footer = (
"https://github.com/iscc"
f" | iscc-core v{ic.__version__}"
f" | iscc-sdk v{idk.__version__}"
f" | iscc-sci v{sci.__version__}"
f" | iscc-schema v{iss.__version__}"
)
gr.Markdown(
footer,
)
def rewrite_uri(filepath, sample_set):
# type: (str, str) -> str
"""Rewrites temporary image URI to original sample URI"""
if filepath:
inpath = Path(filepath)
outpath = HERE / f"{sample_set}/{inpath.name.replace('jpeg', 'jpg')}"
log.info(filepath)
return outpath.as_posix()
def process_upload(filepath, suffix):
# type: (str, str) -> dict
"""Generate extended ISCC with experimental Semantic Code (for images)"""
# Map to active component group
in_file_func = globals().get(f"in_file_{suffix}")
out_thumb_func = globals().get(f"out_thumb_{suffix}")
out_iscc_func = globals().get(f"out_iscc_{suffix}")
out_dna_func = globals().get(f"out_dna_{suffix}")
out_meta_func = globals().get(f"out_meta_{suffix}")
# Handle emtpy filepath
if not filepath:
return {
in_file_func: None,
}
imeta: idk.IsccMeta = iscc_semantic(filepath)
# Create Bit-Matrix Plot
matrix_plot = bit_matrix_plot(imeta.iscc_obj)
# Pop Thumbnail for Preview
thumbnail = None
if imeta.thumbnail:
header, encoded = imeta.thumbnail.split(",", 1)
data = base64.b64decode(encoded)
thumbnail = Image.open(io.BytesIO(data))
imeta.thumbnail = None
result = {
in_file_func: gr.File(visible=False, value=None),
out_thumb_func: gr.Image(visible=True, value=thumbnail),
out_iscc_func: imeta.iscc,
out_dna_func: matrix_plot,
out_meta_func: imeta.json(exclude_unset=False, by_alias=True, indent=2),
}
return result
def iscc_compare(iscc_a, iscc_b):
# type: (str, str) -> dict | None
"""Compare two ISCCs"""
if not all([iscc_a, iscc_b]):
return None, None
dist_data = ic.iscc_compare(iscc_a, iscc_b)
sim_data = dist_to_sim(dist_data, dim=64)
sim_plot = similarity_plot(sim_data)
bit_plot = bit_comparison(iscc_a, iscc_b)
return sim_plot, bit_plot
# Events
in_file_a.change(
lambda file: process_upload(file, "a"),
inputs=[in_file_a],
outputs=[in_file_a, out_thumb_a, out_iscc_a, out_dna_a, out_meta_a],
show_progress="full",
)
in_file_b.change(
lambda file: process_upload(file, "b"),
inputs=[in_file_b],
outputs=[in_file_b, out_thumb_b, out_iscc_b, out_dna_b, out_meta_b],
show_progress="full",
)
out_thumb_a.clear(
lambda: (
gr.File(visible=True),
gr.Image(visible=False),
"",
gr.Plot(value=None),
"",
),
inputs=[],
outputs=[in_file_a, out_thumb_a, out_iscc_a, out_dna_a, out_meta_a],
show_progress="hidden",
)
out_thumb_b.clear(
lambda: (
gr.File(visible=True),
gr.Image(visible=False),
"",
gr.Plot(value=None),
"",
),
inputs=[],
outputs=[in_file_b, out_thumb_b, out_iscc_b, out_dna_b, out_meta_b],
show_progress="hidden",
)
out_iscc_a.change(
iscc_compare,
inputs=[out_iscc_a, out_iscc_b],
outputs=[out_compare, out_bitcompare],
show_progress="hidden",
)
out_iscc_b.change(
iscc_compare,
inputs=[out_iscc_a, out_iscc_b],
outputs=[out_compare, out_bitcompare],
show_progress="hidden",
)
dumy_image_a.change(
lambda file: rewrite_uri(file, "images1"),
inputs=[dumy_image_a],
outputs=[in_file_a],
show_progress="hidden",
)
dumy_image_b.change(
lambda file: rewrite_uri(file, "images2"),
inputs=[dumy_image_b],
outputs=[in_file_b],
show_progress="hidden",
)
if __name__ == "__main__":
demo.launch(debug=True)