iscc-playground / demos /compare.py
titusz's picture
Synced repo using 'sync_with_huggingface' Github Action
b4d6c91 verified
raw
history blame
17.1 kB
import base64
import io
from loguru import logger as log
from pathlib import Path
import gradio as gr
from PIL import Image
import iscc_core as ic
import iscc_sdk as idk
import iscc_schema as iss
import iscc_sci as sci
import plotly.graph_objects as go
import pandas as pd
idk.sdk_opts.image_thumbnail_size = 265
idk.sdk_opts.image_thumbnail_quality = 80
HERE = Path(__file__).parent.absolute()
IMAGES1 = HERE / "images1"
IMAGES2 = HERE / "images2"
custom_css = """
.fixed-height {
height: 240px; /* Fixed height */
object-fit: contain; /* Scale the image to fit within the element */
}
.small-height {
display: flex; /* Use flexbox layout */
flex-direction: column; /* Arrange children vertically */
justify-content: flex-end; /* Align children to the end (bottom) */
height: 85px; /* Fixed height */
object-fit: contain; /* Scale the content to fit within the element */
}
.bit-matrix-big {
display: flex;
flex-direction: column;
justify-content: flex-end;
height: 120px; /* Fixed height */
object-fit: contain; /* Scale the content to fit within the element */
}
.iscc-unit-sim {
display: flex;
flex-direction: column;
justify-content: flex-end;
height: 120px; /* Fixed height */
object-fit: contain; /* Scale the content to fit within the element */
}
.modebar-btn {
display: none !important;
}
#examples-a, #examples-b {
height: 140px; /* Fixed height */
object-fit: contain; /* Scale the image to fit within the element */
}
"""
def iscc_semantic(filepath: str) -> idk.IsccMeta:
"""Generate ISCC-CODE extended with Semantic-Code for supported modalities (Image)"""
imeta = idk.code_iscc(filepath)
if imeta.mode == "image":
# Inject Semantic-Code
sci_code = sci.code_image_semantic(filepath, bits=64)["iscc"]
units = ic.iscc_decompose(imeta.iscc)
units.append(sci_code)
iscc_code_s = ic.gen_iscc_code(units)["iscc"]
imeta.iscc = iscc_code_s
return imeta
def dist_to_sim(data, dim=64):
result = {}
for k, v in data.items():
if k == "instance_match":
result[k.split("_")[0].title()] = 1.0 if v is True else -1.0
else:
result[k.split("_")[0].title()] = hamming_to_cosine(v, dim)
return result
def hamming_to_cosine(hamming_distance: int, dim: int) -> float:
"""Aproximate the cosine similarity for a given hamming distance and dimension"""
result = 1 - (2 * hamming_distance) / dim
log.debug(f"Hamming distance: {hamming_distance} - Dim: {dim} - Result: {result}")
return result
def similarity_plot(sim_data):
# type: (dict) -> go.Figure
# Convert input dictionary to DataFrame, sort by value for visual consistency
data_df = pd.DataFrame(reversed(sim_data.items()), columns=["Category", "Value"])
data_df["Percentage"] = data_df["Value"] * 100 # Convert to percentage
# Define color for bars based on value
data_df["Color"] = ["#f56169" if x < 0 else "#a6db50" for x in data_df["Value"]]
# Create Plotly Figure
fig = go.Figure()
fig.add_trace(
go.Bar(
x=data_df["Value"],
y=data_df["Category"],
orientation="h",
marker_color=data_df["Color"],
marker_line={"width": 0},
text=data_df["Percentage"].apply(lambda x: f"{x:.2f}%"),
textposition="inside",
textfont={
"size": 14,
"family": "JetBrains Mono",
"color": "white",
},
hoverinfo=None,
hovertemplate="ISCC-UNIT: %{y}<br>SIMILARITY: %{x}<extra></extra>",
hoverlabel={
"font": {"family": "JetBrains Mono", "color": "#FFFFFF"},
"bgcolor": "#444444",
},
)
)
# Update layout for aesthetics
fig.update_layout(
height=len(sim_data) * 40,
autosize=True,
xaxis=dict(
title="",
tickformat=",.0%",
showticklabels=False,
),
yaxis=dict(
title="",
showticklabels=False,
),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
showlegend=False,
modebar_remove=[
"toImage",
"zoom",
"pan",
"zoomIn",
"zoomOut",
"autoScale",
"resetScale",
],
)
# Adjust the x-axis to accommodate percentage labels
fig.update_xaxes(
range=[-1.1, 1.1],
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
return fig
def bit_matrix_plot(iscc_code):
# type: (ic.Code) -> go.Figure
"""
Create a bit matrix plot for an ISCC-CODE
"""
# Decode ISCC-CODE
data = {}
for unit in ic.iscc_decompose(iscc_code.code):
unit = ic.Code(unit)
data[unit.type_id.split("-")[0]] = unit.hash_bits
# Prepare data for heatmap
z = []
for key, value in data.items():
z.append([int(bit) for bit in value])
# Define colors for 0 and 1 bits
colorscale = [[0, "#7ac2f7"], [1, "#0054b2"]]
# Build Plotly Visualization
fig = go.Figure(
data=go.Heatmap(
z=z,
xgap=2,
ygap=2,
showscale=False,
colorscale=colorscale,
hoverinfo="x+y",
hovertemplate="ISCC-UNIT: %{y}<br>BIT-NUMBR: %{x}<br>BIT-VALUE: %{z}<extra></extra>",
hoverlabel={
"font": {"family": "JetBrains Mono"},
},
)
)
fig.update_layout(
height=60,
autosize=True,
xaxis=dict(
ticks="",
side="top",
scaleanchor="y",
constrain="domain",
showticklabels=False,
),
yaxis=dict(
ticks="",
tickvals=list(range(len(data))),
ticktext=list(data.keys()),
side="left",
autorange="reversed",
showticklabels=False,
),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
margin=dict(l=10, r=10, t=0, b=10),
modebar_remove=[
"toImage",
"zoom",
"pan",
"zoomIn",
"zoomOut",
"autoScale",
"resetScale",
],
)
fig.update_xaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
fig.update_yaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
return fig
def bit_comparison(iscc_code1, iscc_code2):
"""
Create a comparison bit matrix plot for two ISCC-CODES
"""
# Decode ISCC-CODEs
data1, data2 = {}, {}
for unit in ic.iscc_decompose(iscc_code1):
unit = ic.Code(unit)
data1[unit.type_id.split("-")[0]] = unit.hash_bits
for unit in ic.iscc_decompose(iscc_code2):
unit = ic.Code(unit)
data2[unit.type_id.split("-")[0]] = unit.hash_bits
# Prepare data for heatmap comparison
z = []
text = []
for key in data1.keys():
z_row = []
text_row = []
for bit1, bit2 in zip(data1[key], data2.get(key, "")):
if bit1 == bit2:
z_row.append(int(bit1))
text_row.append(bit1)
else:
z_row.append(2)
text_row.append("x")
z.append(z_row)
text.append(text_row)
# Define colors for 0, 1, and non-matching bits
colorscale = [[0, "#a6db50"], [0.5, "#a6db50"], [1, "#f56169"]]
fig = go.Figure(
data=go.Heatmap(
z=z,
text=text,
xgap=2,
ygap=2,
showscale=False,
colorscale=colorscale,
hoverinfo="text",
hovertemplate="ISCC-UNIT: %{y}<br>BIT-NUMBR: %{x}<br>BIT-VALUE: %{z}<extra></extra>",
hoverlabel={
"font": {"family": "JetBrains Mono"},
},
texttemplate="%{text}", # Use "%{text}" for showing bits
textfont={
"size": 14,
"color": "#FFFFFF",
"family": "JetBrains Mono",
},
)
)
fig.update_layout(
height=120,
autosize=True,
xaxis=dict(
ticks="",
side="top",
scaleanchor="y",
constrain="domain",
showticklabels=False,
),
yaxis=dict(
ticks="",
tickvals=list(range(len(data1))),
ticktext=list(data1.keys()),
side="left",
autorange="reversed",
showticklabels=False,
),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
margin=dict(l=0, r=0, t=0, b=0),
modebar_remove=[
"toImage",
"zoom",
"pan",
"zoomIn",
"zoomOut",
"autoScale",
"resetScale",
],
)
fig.update_xaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
fig.update_yaxes(
fixedrange=False,
showline=False,
zeroline=False,
showgrid=False,
gridcolor="rgba(0,0,0,0)",
)
return fig
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("## ⚙️ ISCC Similarity Comparison")
with gr.Row(variant="default", equal_height=True):
with gr.Column(variant="compact"):
in_file_a = gr.File(
label="Media File A", type="filepath", elem_classes=["fixed-height"]
)
out_thumb_a = gr.Image(
label="Extracted Thumbnail",
visible=False,
height=240,
elem_classes=["fixed-height"],
interactive=True,
show_download_button=False,
sources=["upload"],
)
# Proxy component to patch image example selection -> gr.File
dumy_image_a = gr.Image(visible=False, type="filepath", height=240)
gr.Examples(
examples=IMAGES1.as_posix(),
cache_examples=False,
inputs=[dumy_image_a],
elem_id="examples-a",
)
out_iscc_a = gr.Text(label="ISCC", show_copy_button=True)
with gr.Accordion(label="Details", open=False):
out_dna_a = gr.Plot(
label="BIT-MATRIX",
container=True,
elem_classes=["small-height"],
)
out_meta_a = gr.Code(language="json", label="ISCC Metadata")
with gr.Column(variant="compact"):
in_file_b = gr.File(
label="Media File B", type="filepath", elem_classes=["fixed-height"]
)
out_thumb_b = gr.Image(
label="Extracted Thumbnail",
visible=False,
height=240,
elem_classes=["fixed-height"],
interactive=True,
show_download_button=False,
sources=["upload"],
)
# Proxy component to patch image example selection -> gr.File
dumy_image_b = gr.Image(visible=False, type="filepath", height=240)
gr.Examples(
examples=IMAGES2.as_posix(),
cache_examples=False,
inputs=[dumy_image_b],
elem_id="examples-b",
)
out_iscc_b = gr.Text(label="ISCC", show_copy_button=True)
with gr.Accordion(
label="Details",
open=False,
):
out_dna_b = gr.Plot(
label="BIT-MATRIX",
container=True,
elem_classes=["small-height"],
)
out_meta_b = gr.Code(language="json", label="ISCC Metadata")
with gr.Row(variant="default", equal_height=True):
with gr.Column(variant="compact"):
out_bitcompare = gr.Plot(
label="BIT-MATRIX Comparison",
container=True,
elem_classes=["bit-matrix-big"],
)
with gr.Row(variant="default", equal_height=True):
with gr.Column(variant="compact"):
out_compare = gr.Plot(
label="ISCC-UNIT Similarities",
container=True,
elem_classes=["iscc-unit-sim"],
)
# Custom footer
footer = (
"https://github.com/iscc"
f" | iscc-core v{ic.__version__}"
f" | iscc-sdk v{idk.__version__}"
f" | iscc-sci v{sci.__version__}"
f" | iscc-schema v{iss.__version__}"
)
gr.Markdown(
footer,
)
def rewrite_uri(filepath, sample_set):
# type: (str, str) -> str
"""Rewrites temporary image URI to original sample URI"""
if filepath:
inpath = Path(filepath)
outpath = HERE / f"{sample_set}/{inpath.name.replace('jpeg', 'jpg')}"
log.info(filepath)
return outpath.as_posix()
def process_upload(filepath, suffix):
# type: (str, str) -> dict
"""Generate extended ISCC with experimental Semantic Code (for images)"""
# Map to active component group
in_file_func = globals().get(f"in_file_{suffix}")
out_thumb_func = globals().get(f"out_thumb_{suffix}")
out_iscc_func = globals().get(f"out_iscc_{suffix}")
out_dna_func = globals().get(f"out_dna_{suffix}")
out_meta_func = globals().get(f"out_meta_{suffix}")
# Handle emtpy filepath
if not filepath:
return {
in_file_func: None,
}
imeta: idk.IsccMeta = iscc_semantic(filepath)
# Create Bit-Matrix Plot
matrix_plot = bit_matrix_plot(imeta.iscc_obj)
# Pop Thumbnail for Preview
thumbnail = None
if imeta.thumbnail:
header, encoded = imeta.thumbnail.split(",", 1)
data = base64.b64decode(encoded)
thumbnail = Image.open(io.BytesIO(data))
imeta.thumbnail = None
result = {
in_file_func: gr.File(visible=False, value=None),
out_thumb_func: gr.Image(visible=True, value=thumbnail),
out_iscc_func: imeta.iscc,
out_dna_func: matrix_plot,
out_meta_func: imeta.json(exclude_unset=False, by_alias=True, indent=2),
}
return result
def iscc_compare(iscc_a, iscc_b):
# type: (str, str) -> dict | None
"""Compare two ISCCs"""
if not all([iscc_a, iscc_b]):
return None, None
dist_data = ic.iscc_compare(iscc_a, iscc_b)
sim_data = dist_to_sim(dist_data, dim=64)
sim_plot = similarity_plot(sim_data)
bit_plot = bit_comparison(iscc_a, iscc_b)
return sim_plot, bit_plot
# Events
in_file_a.change(
lambda file: process_upload(file, "a"),
inputs=[in_file_a],
outputs=[in_file_a, out_thumb_a, out_iscc_a, out_dna_a, out_meta_a],
show_progress="full",
)
in_file_b.change(
lambda file: process_upload(file, "b"),
inputs=[in_file_b],
outputs=[in_file_b, out_thumb_b, out_iscc_b, out_dna_b, out_meta_b],
show_progress="full",
)
out_thumb_a.clear(
lambda: (
gr.File(visible=True),
gr.Image(visible=False),
"",
gr.Plot(value=None),
"",
),
inputs=[],
outputs=[in_file_a, out_thumb_a, out_iscc_a, out_dna_a, out_meta_a],
show_progress="hidden",
)
out_thumb_b.clear(
lambda: (
gr.File(visible=True),
gr.Image(visible=False),
"",
gr.Plot(value=None),
"",
),
inputs=[],
outputs=[in_file_b, out_thumb_b, out_iscc_b, out_dna_b, out_meta_b],
show_progress="hidden",
)
out_iscc_a.change(
iscc_compare,
inputs=[out_iscc_a, out_iscc_b],
outputs=[out_compare, out_bitcompare],
show_progress="hidden",
)
out_iscc_b.change(
iscc_compare,
inputs=[out_iscc_a, out_iscc_b],
outputs=[out_compare, out_bitcompare],
show_progress="hidden",
)
dumy_image_a.change(
lambda file: rewrite_uri(file, "images1"),
inputs=[dumy_image_a],
outputs=[in_file_a],
show_progress="hidden",
)
dumy_image_b.change(
lambda file: rewrite_uri(file, "images2"),
inputs=[dumy_image_b],
outputs=[in_file_b],
show_progress="hidden",
)
if __name__ == "__main__":
demo.launch(debug=True)