ARCQUB's picture
Upload folder using huggingface_hub
6c0c37c verified
raw
history blame
6.27 kB
import gradio as gr
import importlib
from PIL import Image
import json
# === Model Mapping ===
MODEL_MAP = {
"Qwen": "models.qwen",
"Pixtral": "models.pixtral",
"Aya Vision": "models.aya_vision",
"GPT-4o": "models.gpt4o"
}
# === Load Model
def load_model_runner(model_name):
module = importlib.import_module(MODEL_MAP[model_name])
return module.run_model
# === Format Raw JSON Output
def format_result_json(output):
if isinstance(output, dict):
return json.dumps(output, indent=2)
else:
return str(output).strip()
# === Prettified Output View
def format_pretty_view(output):
if not isinstance(output, dict):
return "No structured JSON found.\n\n" + str(output)
lines = []
process = output.get("process", output)
if "name" in process:
lines.append(f"๐Ÿ“ฆ Process Name: {process['name']}\n")
if "startEvent" in process:
start = process["startEvent"]
name = start.get("name", "")
type_ = start.get("type", "")
desc = start.get("description", "")
line = f"โ–ถ๏ธ Start: {name}"
if type_:
line += f" ({type_})"
if desc:
line += f" - {desc}"
lines.append(line)
if "endEvent" in process:
end = process["endEvent"]
name = end.get("name", "")
type_ = end.get("type", "")
desc = end.get("description", "")
line = f"โน End: {name}"
if type_:
line += f" ({type_})"
if desc:
line += f" - {desc}"
lines.append(line)
if "tasks" in process:
lines.append("\n๐Ÿ”น Tasks:")
for t in process["tasks"]:
name = t.get("name", "")
type_ = t.get("type", "")
desc = t.get("description", "")
line = f" - {name}"
if type_:
line += f" ({type_})"
if desc:
line += f" - {desc}"
lines.append(line)
if "events" in process:
lines.append("\n๐Ÿ“จ Events:")
for e in process["events"]:
name = e.get("name", "")
type_ = e.get("type", "")
desc = e.get("description", "")
line = f" - {name}"
if type_:
line += f" ({type_})"
if desc:
line += f" - {desc}"
lines.append(line)
if "gateways" in process:
lines.append("\n๐Ÿ”€ Gateways:")
for g in process["gateways"]:
name = g.get("name", "")
type_ = g.get("type", "")
label = g.get("label", "") # some outputs may use 'label'
desc = g.get("description", "")
line = f" - {name}"
if type_:
line += f" ({type_})"
if label:
line += f" | Label: {label}"
if desc:
line += f" - {desc}"
lines.append(line)
if "sequenceFlows" in process:
lines.append("\nโžก๏ธ Sequence Flows:")
for f in process["sequenceFlows"]:
src = f.get("sourceTask") or f.get("sourceEvent") or "Unknown"
tgt = f.get("targetTask") or f.get("targetEvent") or "Unknown"
condition = f.get("condition", "")
line = f" - {src} โ†’ {tgt}"
if condition:
line += f" [Condition: {condition}]"
lines.append(line)
if "connections" in process:
lines.append("\n๐Ÿ”— Connections:")
for c in process["connections"]:
src = c.get("sourceTask") or c.get("sourceEvent") or "Unknown"
tgt = c.get("targetTask") or c.get("targetEvent") or "Unknown"
condition = c.get("condition", "")
line = f" - {src} โ†’ {tgt}"
if condition:
line += f" [Condition: {condition}]"
lines.append(line)
if "relationships" in process:
lines.append("\n๐Ÿ”— Relationships:")
for r in process["relationships"]:
source = r.get("source")
target = r.get("target")
src = source.get("ref", "Unknown") if isinstance(source, dict) else str(source)
tgt = target.get("ref", "Unknown") if isinstance(target, dict) else str(target)
desc = r.get("description", "")
line = f" - {src} โ†’ {tgt}"
if desc:
line += f" | {desc}"
lines.append(line)
return "\n".join(lines).strip()
# === Main Inference Handler
def process_single_image(model_name, image_file, api_key_file=None):
runner = load_model_runner(model_name)
image = Image.open(image_file.name).convert("RGB")
api_key = None
if model_name == "GPT-4o" and api_key_file is not None:
try:
api_key = open(api_key_file.name, "r").read().strip()
except Exception as e:
return image, "(API key file could not be read)", f"(Error: {e})"
if model_name == "GPT-4o":
result = runner(image, api_key=api_key)
else:
result = runner(image)
parsed_json = result.get("json")
raw_text = result.get("raw", "")
if parsed_json:
json_output = format_result_json(parsed_json)
pretty_output = format_pretty_view(parsed_json)
else:
json_output = "(No valid JSON extracted)"
pretty_output = "(No structured content extracted)\n\nโš ๏ธ Raw Model Output:\n" + raw_text
return image, json_output, pretty_output
# === Gradio Interface (Simple)
iface = gr.Interface(
fn=process_single_image,
inputs=[
gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"),
gr.File(file_types=["image"], label="Upload a BPMN Image"),
gr.File(file_types=[".txt"], label="๐Ÿ” Upload OpenAI API Key File (only for GPT-4o)")
],
outputs=[
gr.Image(label="Input Image"),
gr.Textbox(label="Raw JSON Output (Technical)", lines=20),
gr.Textbox(label="Prettified View (User-Friendly)", lines=25)
],
title="๐Ÿ–ผ๏ธ Vision Model Extractor - JSON + Pretty View",
description="Upload a BPMN image and select a vision model to extract structured output. API key file is required only for GPT-4o.",
allow_flagging="never"
)
iface.launch(share=True)