import base64 import io from loguru import logger as log from pathlib import Path import gradio as gr from PIL import Image import iscc_core as ic import iscc_sdk as idk import iscc_schema as iss import iscc_sci as sci import plotly.graph_objects as go import pandas as pd idk.sdk_opts.image_thumbnail_size = 265 idk.sdk_opts.image_thumbnail_quality = 80 HERE = Path(__file__).parent.absolute() IMAGES1 = HERE / "images1" IMAGES2 = HERE / "images2" custom_css = """ .fixed-height { height: 240px; /* Fixed height */ object-fit: contain; /* Scale the image to fit within the element */ } .small-height { display: flex; /* Use flexbox layout */ flex-direction: column; /* Arrange children vertically */ justify-content: flex-end; /* Align children to the end (bottom) */ height: 85px; /* Fixed height */ object-fit: contain; /* Scale the content to fit within the element */ } .bit-matrix-big { display: flex; flex-direction: column; justify-content: flex-end; height: 120px; /* Fixed height */ object-fit: contain; /* Scale the content to fit within the element */ } .iscc-unit-sim { display: flex; flex-direction: column; justify-content: flex-end; height: 120px; /* Fixed height */ object-fit: contain; /* Scale the content to fit within the element */ } .modebar-btn { display: none !important; } #examples-a, #examples-b { height: 140px; /* Fixed height */ object-fit: contain; /* Scale the image to fit within the element */ } """ def iscc_semantic(filepath: str) -> idk.IsccMeta: """Generate ISCC-CODE extended with Semantic-Code for supported modalities (Image)""" imeta = idk.code_iscc(filepath) if imeta.mode == "image": # Inject Semantic-Code sci_code = sci.code_image_semantic(filepath, bits=64)["iscc"] units = ic.iscc_decompose(imeta.iscc) units.append(sci_code) iscc_code_s = ic.gen_iscc_code(units)["iscc"] imeta.iscc = iscc_code_s return imeta def dist_to_sim(data, dim=64): result = {} for k, v in data.items(): if k == "instance_match": result[k.split("_")[0].title()] = 1.0 if v is True else -1.0 else: result[k.split("_")[0].title()] = hamming_to_cosine(v, dim) return result def hamming_to_cosine(hamming_distance: int, dim: int) -> float: """Aproximate the cosine similarity for a given hamming distance and dimension""" result = 1 - (2 * hamming_distance) / dim log.debug(f"Hamming distance: {hamming_distance} - Dim: {dim} - Result: {result}") return result def similarity_plot(sim_data): # type: (dict) -> go.Figure # Convert input dictionary to DataFrame, sort by value for visual consistency data_df = pd.DataFrame(reversed(sim_data.items()), columns=["Category", "Value"]) data_df["Percentage"] = data_df["Value"] * 100 # Convert to percentage # Define color for bars based on value data_df["Color"] = ["#f56169" if x < 0 else "#a6db50" for x in data_df["Value"]] # Create Plotly Figure fig = go.Figure() fig.add_trace( go.Bar( x=data_df["Value"], y=data_df["Category"], orientation="h", marker_color=data_df["Color"], marker_line={"width": 0}, text=data_df["Percentage"].apply(lambda x: f"{x:.2f}%"), textposition="inside", textfont={ "size": 14, "family": "JetBrains Mono", "color": "white", }, hoverinfo=None, hovertemplate="ISCC-UNIT: %{y}
SIMILARITY: %{x}", hoverlabel={ "font": {"family": "JetBrains Mono", "color": "#FFFFFF"}, "bgcolor": "#444444", }, ) ) # Update layout for aesthetics fig.update_layout( height=len(sim_data) * 40, autosize=True, xaxis=dict( title="", tickformat=",.0%", showticklabels=False, ), yaxis=dict( title="", showticklabels=False, ), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", showlegend=False, modebar_remove=[ "toImage", "zoom", "pan", "zoomIn", "zoomOut", "autoScale", "resetScale", ], ) # Adjust the x-axis to accommodate percentage labels fig.update_xaxes( range=[-1.1, 1.1], fixedrange=False, showline=False, zeroline=False, showgrid=False, gridcolor="rgba(0,0,0,0)", ) return fig def bit_matrix_plot(iscc_code): # type: (ic.Code) -> go.Figure """ Create a bit matrix plot for an ISCC-CODE """ # Decode ISCC-CODE data = {} for unit in ic.iscc_decompose(iscc_code.code): unit = ic.Code(unit) data[unit.type_id.split("-")[0]] = unit.hash_bits # Prepare data for heatmap z = [] for key, value in data.items(): z.append([int(bit) for bit in value]) # Define colors for 0 and 1 bits colorscale = [[0, "#7ac2f7"], [1, "#0054b2"]] # Build Plotly Visualization fig = go.Figure( data=go.Heatmap( z=z, xgap=2, ygap=2, showscale=False, colorscale=colorscale, hoverinfo="x+y", hovertemplate="ISCC-UNIT: %{y}
BIT-NUMBR: %{x}
BIT-VALUE: %{z}", hoverlabel={ "font": {"family": "JetBrains Mono"}, }, ) ) fig.update_layout( height=60, autosize=True, xaxis=dict( ticks="", side="top", scaleanchor="y", constrain="domain", showticklabels=False, ), yaxis=dict( ticks="", tickvals=list(range(len(data))), ticktext=list(data.keys()), side="left", autorange="reversed", showticklabels=False, ), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", margin=dict(l=10, r=10, t=0, b=10), modebar_remove=[ "toImage", "zoom", "pan", "zoomIn", "zoomOut", "autoScale", "resetScale", ], ) fig.update_xaxes( fixedrange=False, showline=False, zeroline=False, showgrid=False, gridcolor="rgba(0,0,0,0)", ) fig.update_yaxes( fixedrange=False, showline=False, zeroline=False, showgrid=False, gridcolor="rgba(0,0,0,0)", ) return fig def bit_comparison(iscc_code1, iscc_code2): """ Create a comparison bit matrix plot for two ISCC-CODES """ # Decode ISCC-CODEs data1, data2 = {}, {} for unit in ic.iscc_decompose(iscc_code1): unit = ic.Code(unit) data1[unit.type_id.split("-")[0]] = unit.hash_bits for unit in ic.iscc_decompose(iscc_code2): unit = ic.Code(unit) data2[unit.type_id.split("-")[0]] = unit.hash_bits # Prepare data for heatmap comparison z = [] text = [] for key in data1.keys(): z_row = [] text_row = [] for bit1, bit2 in zip(data1[key], data2.get(key, "")): if bit1 == bit2: z_row.append(int(bit1)) text_row.append(bit1) else: z_row.append(2) text_row.append("x") z.append(z_row) text.append(text_row) # Define colors for 0, 1, and non-matching bits colorscale = [[0, "#a6db50"], [0.5, "#a6db50"], [1, "#f56169"]] fig = go.Figure( data=go.Heatmap( z=z, text=text, xgap=2, ygap=2, showscale=False, colorscale=colorscale, hoverinfo="text", hovertemplate="ISCC-UNIT: %{y}
BIT-NUMBR: %{x}
BIT-VALUE: %{z}", hoverlabel={ "font": {"family": "JetBrains Mono"}, }, texttemplate="%{text}", # Use "%{text}" for showing bits textfont={ "size": 14, "color": "#FFFFFF", "family": "JetBrains Mono", }, ) ) fig.update_layout( height=120, autosize=True, xaxis=dict( ticks="", side="top", scaleanchor="y", constrain="domain", showticklabels=False, ), yaxis=dict( ticks="", tickvals=list(range(len(data1))), ticktext=list(data1.keys()), side="left", autorange="reversed", showticklabels=False, ), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", margin=dict(l=0, r=0, t=0, b=0), modebar_remove=[ "toImage", "zoom", "pan", "zoomIn", "zoomOut", "autoScale", "resetScale", ], ) fig.update_xaxes( fixedrange=False, showline=False, zeroline=False, showgrid=False, gridcolor="rgba(0,0,0,0)", ) fig.update_yaxes( fixedrange=False, showline=False, zeroline=False, showgrid=False, gridcolor="rgba(0,0,0,0)", ) return fig with gr.Blocks(css=custom_css) as demo: gr.Markdown("## ⚙️ ISCC Similarity Comparison") with gr.Row(variant="default", equal_height=True): with gr.Column(variant="compact"): in_file_a = gr.File( label="Media File A", type="filepath", elem_classes=["fixed-height"] ) out_thumb_a = gr.Image( label="Extracted Thumbnail", visible=False, height=240, elem_classes=["fixed-height"], interactive=True, show_download_button=False, sources=["upload"], ) # Proxy component to patch image example selection -> gr.File dumy_image_a = gr.Image(visible=False, type="filepath", height=240) gr.Examples( examples=IMAGES1.as_posix(), cache_examples=False, inputs=[dumy_image_a], elem_id="examples-a", ) out_iscc_a = gr.Text(label="ISCC", show_copy_button=True) with gr.Accordion(label="Details", open=False): out_dna_a = gr.Plot( label="BIT-MATRIX", container=True, elem_classes=["small-height"], ) out_meta_a = gr.Code(language="json", label="ISCC Metadata") with gr.Column(variant="compact"): in_file_b = gr.File( label="Media File B", type="filepath", elem_classes=["fixed-height"] ) out_thumb_b = gr.Image( label="Extracted Thumbnail", visible=False, height=240, elem_classes=["fixed-height"], interactive=True, show_download_button=False, sources=["upload"], ) # Proxy component to patch image example selection -> gr.File dumy_image_b = gr.Image(visible=False, type="filepath", height=240) gr.Examples( examples=IMAGES2.as_posix(), cache_examples=False, inputs=[dumy_image_b], elem_id="examples-b", ) out_iscc_b = gr.Text(label="ISCC", show_copy_button=True) with gr.Accordion( label="Details", open=False, ): out_dna_b = gr.Plot( label="BIT-MATRIX", container=True, elem_classes=["small-height"], ) out_meta_b = gr.Code(language="json", label="ISCC Metadata") with gr.Row(variant="default", equal_height=True): with gr.Column(variant="compact"): out_bitcompare = gr.Plot( label="BIT-MATRIX Comparison", container=True, elem_classes=["bit-matrix-big"], ) with gr.Row(variant="default", equal_height=True): with gr.Column(variant="compact"): out_compare = gr.Plot( label="ISCC-UNIT Similarities", container=True, elem_classes=["iscc-unit-sim"], ) # Custom footer footer = ( "https://github.com/iscc" f" | iscc-core v{ic.__version__}" f" | iscc-sdk v{idk.__version__}" f" | iscc-sci v{sci.__version__}" f" | iscc-schema v{iss.__version__}" ) gr.Markdown( footer, ) def rewrite_uri(filepath, sample_set): # type: (str, str) -> str """Rewrites temporary image URI to original sample URI""" if filepath: inpath = Path(filepath) outpath = HERE / f"{sample_set}/{inpath.name.replace('jpeg', 'jpg')}" log.info(filepath) return outpath.as_posix() def process_upload(filepath, suffix): # type: (str, str) -> dict """Generate extended ISCC with experimental Semantic Code (for images)""" # Map to active component group in_file_func = globals().get(f"in_file_{suffix}") out_thumb_func = globals().get(f"out_thumb_{suffix}") out_iscc_func = globals().get(f"out_iscc_{suffix}") out_dna_func = globals().get(f"out_dna_{suffix}") out_meta_func = globals().get(f"out_meta_{suffix}") # Handle emtpy filepath if not filepath: return { in_file_func: None, } imeta: idk.IsccMeta = iscc_semantic(filepath) # Create Bit-Matrix Plot matrix_plot = bit_matrix_plot(imeta.iscc_obj) # Pop Thumbnail for Preview thumbnail = None if imeta.thumbnail: header, encoded = imeta.thumbnail.split(",", 1) data = base64.b64decode(encoded) thumbnail = Image.open(io.BytesIO(data)) imeta.thumbnail = None result = { in_file_func: gr.File(visible=False, value=None), out_thumb_func: gr.Image(visible=True, value=thumbnail), out_iscc_func: imeta.iscc, out_dna_func: matrix_plot, out_meta_func: imeta.json(exclude_unset=False, by_alias=True, indent=2), } return result def iscc_compare(iscc_a, iscc_b): # type: (str, str) -> dict | None """Compare two ISCCs""" if not all([iscc_a, iscc_b]): return None, None dist_data = ic.iscc_compare(iscc_a, iscc_b) sim_data = dist_to_sim(dist_data, dim=64) sim_plot = similarity_plot(sim_data) bit_plot = bit_comparison(iscc_a, iscc_b) return sim_plot, bit_plot # Events in_file_a.change( lambda file: process_upload(file, "a"), inputs=[in_file_a], outputs=[in_file_a, out_thumb_a, out_iscc_a, out_dna_a, out_meta_a], show_progress="full", ) in_file_b.change( lambda file: process_upload(file, "b"), inputs=[in_file_b], outputs=[in_file_b, out_thumb_b, out_iscc_b, out_dna_b, out_meta_b], show_progress="full", ) out_thumb_a.clear( lambda: ( gr.File(visible=True), gr.Image(visible=False), "", gr.Plot(value=None), "", ), inputs=[], outputs=[in_file_a, out_thumb_a, out_iscc_a, out_dna_a, out_meta_a], show_progress="hidden", ) out_thumb_b.clear( lambda: ( gr.File(visible=True), gr.Image(visible=False), "", gr.Plot(value=None), "", ), inputs=[], outputs=[in_file_b, out_thumb_b, out_iscc_b, out_dna_b, out_meta_b], show_progress="hidden", ) out_iscc_a.change( iscc_compare, inputs=[out_iscc_a, out_iscc_b], outputs=[out_compare, out_bitcompare], show_progress="hidden", ) out_iscc_b.change( iscc_compare, inputs=[out_iscc_a, out_iscc_b], outputs=[out_compare, out_bitcompare], show_progress="hidden", ) dumy_image_a.change( lambda file: rewrite_uri(file, "images1"), inputs=[dumy_image_a], outputs=[in_file_a], show_progress="hidden", ) dumy_image_b.change( lambda file: rewrite_uri(file, "images2"), inputs=[dumy_image_b], outputs=[in_file_b], show_progress="hidden", ) if __name__ == "__main__": demo.launch(debug=True)