Spaces:

ARCQUB
/

BPMN-entity-extractor

Sleeping

File size: 6,441 Bytes

import gradio as gr
import importlib
from PIL import Image
import json
import os
import tempfile
import spaces

# === Model Mapping ===
MODEL_MAP = {
    #"Qwen": "models.qwen",
    #"Pixtral": "models.pixtral",
    #"Aya Vision": "models.aya_vision",
    "GPT-4o": "models.gpt4o",
    "GPT-4.1": "models.gpt_4_1",
    "GPT-4.1-mini": "models.gpt_4_1_mini"
}

# === Load Model
def load_model_runner(model_name):
    module = importlib.import_module(MODEL_MAP[model_name])
    return module.run_model

# === Format Raw JSON Output
def format_result_json(output):
    if isinstance(output, dict):
        return json.dumps(output, indent=2)
    else:
        return str(output).strip()

# === Prettified Output View
def format_pretty_view(output):
    if not isinstance(output, dict):
        return "No structured JSON found.\n\n" + str(output)

    lines = []
    process = output.get("process", output)

    if "name" in process:
        lines.append(f"📦 Process Name: {process['name']}\n")

    if "startEvent" in process:
        start = process["startEvent"]
        name = start.get("name", "")
        type_ = start.get("type", "")
        desc = start.get("description", "")
        line = f"▶️ Start: {name}"
        if type_:
            line += f" ({type_})"
        if desc:
            line += f" - {desc}"
        lines.append(line)

    if "endEvent" in process:
        end = process["endEvent"]
        name = end.get("name", "")
        type_ = end.get("type", "")
        desc = end.get("description", "")
        line = f"⏹ End: {name}"
        if type_:
            line += f" ({type_})"
        if desc:
            line += f" - {desc}"
        lines.append(line)

    if "tasks" in process:
        lines.append("\n🔹 Tasks:")
        for t in process["tasks"]:
            name = t.get("name", "")
            type_ = t.get("type", "")
            desc = t.get("description", "")
            line = f" - {name}"
            if type_:
                line += f" ({type_})"
            if desc:
                line += f" - {desc}"
            lines.append(line)

    if "events" in process:
        lines.append("\n📨 Events:")
        for e in process["events"]:
            name = e.get("name", "")
            type_ = e.get("type", "")
            desc = e.get("description", "")
            line = f" - {name}"
            if type_:
                line += f" ({type_})"
            if desc:
                line += f" - {desc}"
            lines.append(line)

    if "gateways" in process:
        lines.append("\n🔀 Gateways:")
        for g in process["gateways"]:
            name = g.get("name", "")
            type_ = g.get("type", "")
            label = g.get("label", "")
            desc = g.get("description", "")
            line = f" - {name}"
            if type_:
                line += f" ({type_})"
            if label:
                line += f" | Label: {label}"
            if desc:
                line += f" - {desc}"
            lines.append(line)

    if "sequenceFlows" in process:
        lines.append("\n➡️ Sequence Flows:")
        for f in process["sequenceFlows"]:
            src = f.get("sourceTask") or f.get("sourceEvent") or "Unknown"
            tgt = f.get("targetTask") or f.get("targetEvent") or "Unknown"
            condition = f.get("condition", "")
            line = f" - {src} → {tgt}"
            if condition:
                line += f" [Condition: {condition}]"
            lines.append(line)

    if "connections" in process:
        lines.append("\n🔗 Connections:")
        for c in process["connections"]:
            src = c.get("sourceTask") or c.get("sourceEvent") or "Unknown"
            tgt = c.get("targetTask") or c.get("targetEvent") or "Unknown"
            condition = c.get("condition", "")
            line = f" - {src} → {tgt}"
            if condition:
                line += f" [Condition: {condition}]"
            lines.append(line)

    if "relationships" in process:
        lines.append("\n🔗 Relationships:")
        for r in process["relationships"]:
            source = r.get("source")
            target = r.get("target")
            src = source.get("ref", "Unknown") if isinstance(source, dict) else str(source)
            tgt = target.get("ref", "Unknown") if isinstance(target, dict) else str(target)
            desc = r.get("description", "")
            line = f" - {src} → {tgt}"
            if desc:
                line += f" | {desc}"
            lines.append(line)

    return "\n".join(lines).strip()

# === Main Inference Handler
def process_single_image(model_name, image_file):
    runner = load_model_runner(model_name)
    image = Image.open(image_file.name).convert("RGB")
    base_name = os.path.splitext(os.path.basename(image_file.name))[0]

    result = runner(image)
    parsed_json = result.get("json")
    raw_text = result.get("raw", "")

    if parsed_json:
        json_output = json.dumps(parsed_json, indent=2)
        pretty_output = format_pretty_view(parsed_json)

        tmp_path = os.path.join(tempfile.gettempdir(), f"{base_name}_output.json")
        with open(tmp_path, "w", encoding="utf-8") as f:
            json.dump(parsed_json, f, indent=2)
    else:
        json_output = "(No valid JSON extracted)"
        pretty_output = "(No structured content extracted)\n\n⚠️ Raw Model Output:\n" + raw_text

        tmp_path = os.path.join(tempfile.gettempdir(), f"{base_name}_output.txt")
        with open(tmp_path, "w", encoding="utf-8") as f:
            f.write(raw_text)

    return image, json_output, pretty_output, tmp_path

# === Gradio UI
iface = gr.Interface(
    fn=process_single_image,
    inputs=[
        gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"),
        gr.File(file_types=["image"], label="Upload a BPMN Image")
    ],
    outputs=[
        gr.Image(label="Input Image"),
        gr.Textbox(label="Raw JSON Output (Technical)", lines=20),
        gr.Textbox(label="Prettified View (User-Friendly)", lines=25),
        gr.File(label="📥 Download JSON", visible=True)
    ],
    title="🖼️ Vision Model Extractor - JSON + Pretty View",
    description="Upload a BPMN image and select a vision model to extract structured output. Currenty supports only GPT-4o.",
    flagging_mode="never"
)

# === Enable GPU mode and launch
#@spaces.GPU
def main():
    iface.launch()

if __name__ == "__main__":
    main()