Spaces:
Sleeping
Sleeping
import gradio as gr | |
import importlib | |
from PIL import Image | |
import json | |
# === Model Mapping === | |
MODEL_MAP = { | |
"Qwen": "models.qwen", | |
"Pixtral": "models.pixtral", | |
"Aya Vision": "models.aya_vision", | |
"GPT-4o": "models.gpt4o" | |
} | |
# === Load Model | |
def load_model_runner(model_name): | |
module = importlib.import_module(MODEL_MAP[model_name]) | |
return module.run_model | |
# === Format Raw JSON Output | |
def format_result_json(output): | |
if isinstance(output, dict): | |
return json.dumps(output, indent=2) | |
else: | |
return str(output).strip() | |
# === Prettified Output View | |
def format_pretty_view(output): | |
if not isinstance(output, dict): | |
return "No structured JSON found.\n\n" + str(output) | |
lines = [] | |
process = output.get("process", output) | |
if "name" in process: | |
lines.append(f"๐ฆ Process Name: {process['name']}\n") | |
if "startEvent" in process: | |
start = process["startEvent"] | |
name = start.get("name", "") | |
type_ = start.get("type", "") | |
desc = start.get("description", "") | |
line = f"โถ๏ธ Start: {name}" | |
if type_: | |
line += f" ({type_})" | |
if desc: | |
line += f" - {desc}" | |
lines.append(line) | |
if "endEvent" in process: | |
end = process["endEvent"] | |
name = end.get("name", "") | |
type_ = end.get("type", "") | |
desc = end.get("description", "") | |
line = f"โน End: {name}" | |
if type_: | |
line += f" ({type_})" | |
if desc: | |
line += f" - {desc}" | |
lines.append(line) | |
if "tasks" in process: | |
lines.append("\n๐น Tasks:") | |
for t in process["tasks"]: | |
name = t.get("name", "") | |
type_ = t.get("type", "") | |
desc = t.get("description", "") | |
line = f" - {name}" | |
if type_: | |
line += f" ({type_})" | |
if desc: | |
line += f" - {desc}" | |
lines.append(line) | |
if "events" in process: | |
lines.append("\n๐จ Events:") | |
for e in process["events"]: | |
name = e.get("name", "") | |
type_ = e.get("type", "") | |
desc = e.get("description", "") | |
line = f" - {name}" | |
if type_: | |
line += f" ({type_})" | |
if desc: | |
line += f" - {desc}" | |
lines.append(line) | |
if "gateways" in process: | |
lines.append("\n๐ Gateways:") | |
for g in process["gateways"]: | |
name = g.get("name", "") | |
type_ = g.get("type", "") | |
label = g.get("label", "") # some outputs may use 'label' | |
desc = g.get("description", "") | |
line = f" - {name}" | |
if type_: | |
line += f" ({type_})" | |
if label: | |
line += f" | Label: {label}" | |
if desc: | |
line += f" - {desc}" | |
lines.append(line) | |
if "sequenceFlows" in process: | |
lines.append("\nโก๏ธ Sequence Flows:") | |
for f in process["sequenceFlows"]: | |
src = f.get("sourceTask") or f.get("sourceEvent") or "Unknown" | |
tgt = f.get("targetTask") or f.get("targetEvent") or "Unknown" | |
condition = f.get("condition", "") | |
line = f" - {src} โ {tgt}" | |
if condition: | |
line += f" [Condition: {condition}]" | |
lines.append(line) | |
if "connections" in process: | |
lines.append("\n๐ Connections:") | |
for c in process["connections"]: | |
src = c.get("sourceTask") or c.get("sourceEvent") or "Unknown" | |
tgt = c.get("targetTask") or c.get("targetEvent") or "Unknown" | |
condition = c.get("condition", "") | |
line = f" - {src} โ {tgt}" | |
if condition: | |
line += f" [Condition: {condition}]" | |
lines.append(line) | |
if "relationships" in process: | |
lines.append("\n๐ Relationships:") | |
for r in process["relationships"]: | |
source = r.get("source") | |
target = r.get("target") | |
src = source.get("ref", "Unknown") if isinstance(source, dict) else str(source) | |
tgt = target.get("ref", "Unknown") if isinstance(target, dict) else str(target) | |
desc = r.get("description", "") | |
line = f" - {src} โ {tgt}" | |
if desc: | |
line += f" | {desc}" | |
lines.append(line) | |
return "\n".join(lines).strip() | |
# === Main Inference Handler | |
def process_single_image(model_name, image_file, api_key_file=None): | |
runner = load_model_runner(model_name) | |
image = Image.open(image_file.name).convert("RGB") | |
api_key = None | |
if model_name == "GPT-4o" and api_key_file is not None: | |
try: | |
api_key = open(api_key_file.name, "r").read().strip() | |
except Exception as e: | |
return image, "(API key file could not be read)", f"(Error: {e})" | |
if model_name == "GPT-4o": | |
result = runner(image, api_key=api_key) | |
else: | |
result = runner(image) | |
parsed_json = result.get("json") | |
raw_text = result.get("raw", "") | |
if parsed_json: | |
json_output = format_result_json(parsed_json) | |
pretty_output = format_pretty_view(parsed_json) | |
else: | |
json_output = "(No valid JSON extracted)" | |
pretty_output = "(No structured content extracted)\n\nโ ๏ธ Raw Model Output:\n" + raw_text | |
return image, json_output, pretty_output | |
# === Gradio Interface (Simple) | |
iface = gr.Interface( | |
fn=process_single_image, | |
inputs=[ | |
gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"), | |
gr.File(file_types=["image"], label="Upload a BPMN Image"), | |
gr.File(file_types=[".txt"], label="๐ Upload OpenAI API Key File (only for GPT-4o)") | |
], | |
outputs=[ | |
gr.Image(label="Input Image"), | |
gr.Textbox(label="Raw JSON Output (Technical)", lines=20), | |
gr.Textbox(label="Prettified View (User-Friendly)", lines=25) | |
], | |
title="๐ผ๏ธ Vision Model Extractor - JSON + Pretty View", | |
description="Upload a BPMN image and select a vision model to extract structured output. API key file is required only for GPT-4o.", | |
allow_flagging="never" | |
) | |
iface.launch(share=True) | |