import gradio as gr import importlib from PIL import Image import json import os import tempfile import spaces # === Model Mapping === MODEL_MAP = { #"Qwen": "models.qwen", #"Pixtral": "models.pixtral", #"Aya Vision": "models.aya_vision", "GPT-4o": "models.gpt4o" } # === Load Model def load_model_runner(model_name): module = importlib.import_module(MODEL_MAP[model_name]) return module.run_model # === Format Raw JSON Output def format_result_json(output): if isinstance(output, dict): return json.dumps(output, indent=2) else: return str(output).strip() # === Prettified Output View def format_pretty_view(output): if not isinstance(output, dict): return "No structured JSON found.\n\n" + str(output) lines = [] process = output.get("process", output) if "name" in process: lines.append(f"šŸ“¦ Process Name: {process['name']}\n") if "startEvent" in process: start = process["startEvent"] name = start.get("name", "") type_ = start.get("type", "") desc = start.get("description", "") line = f"ā–¶ļø Start: {name}" if type_: line += f" ({type_})" if desc: line += f" - {desc}" lines.append(line) if "endEvent" in process: end = process["endEvent"] name = end.get("name", "") type_ = end.get("type", "") desc = end.get("description", "") line = f"ā¹ End: {name}" if type_: line += f" ({type_})" if desc: line += f" - {desc}" lines.append(line) if "tasks" in process: lines.append("\nšŸ”¹ Tasks:") for t in process["tasks"]: name = t.get("name", "") type_ = t.get("type", "") desc = t.get("description", "") line = f" - {name}" if type_: line += f" ({type_})" if desc: line += f" - {desc}" lines.append(line) if "events" in process: lines.append("\nšŸ“Ø Events:") for e in process["events"]: name = e.get("name", "") type_ = e.get("type", "") desc = e.get("description", "") line = f" - {name}" if type_: line += f" ({type_})" if desc: line += f" - {desc}" lines.append(line) if "gateways" in process: lines.append("\nšŸ”€ Gateways:") for g in process["gateways"]: name = g.get("name", "") type_ = g.get("type", "") label = g.get("label", "") desc = g.get("description", "") line = f" - {name}" if type_: line += f" ({type_})" if label: line += f" | Label: {label}" if desc: line += f" - {desc}" lines.append(line) if "sequenceFlows" in process: lines.append("\nāž”ļø Sequence Flows:") for f in process["sequenceFlows"]: src = f.get("sourceTask") or f.get("sourceEvent") or "Unknown" tgt = f.get("targetTask") or f.get("targetEvent") or "Unknown" condition = f.get("condition", "") line = f" - {src} ā†’ {tgt}" if condition: line += f" [Condition: {condition}]" lines.append(line) if "connections" in process: lines.append("\nšŸ”— Connections:") for c in process["connections"]: src = c.get("sourceTask") or c.get("sourceEvent") or "Unknown" tgt = c.get("targetTask") or c.get("targetEvent") or "Unknown" condition = c.get("condition", "") line = f" - {src} ā†’ {tgt}" if condition: line += f" [Condition: {condition}]" lines.append(line) if "relationships" in process: lines.append("\nšŸ”— Relationships:") for r in process["relationships"]: source = r.get("source") target = r.get("target") src = source.get("ref", "Unknown") if isinstance(source, dict) else str(source) tgt = target.get("ref", "Unknown") if isinstance(target, dict) else str(target) desc = r.get("description", "") line = f" - {src} ā†’ {tgt}" if desc: line += f" | {desc}" lines.append(line) return "\n".join(lines).strip() # === Main Inference Handler def process_single_image(model_name, image_file): runner = load_model_runner(model_name) image = Image.open(image_file.name).convert("RGB") base_name = os.path.splitext(os.path.basename(image_file.name))[0] result = runner(image) parsed_json = result.get("json") raw_text = result.get("raw", "") if parsed_json: json_output = json.dumps(parsed_json, indent=2) pretty_output = format_pretty_view(parsed_json) tmp_path = os.path.join(tempfile.gettempdir(), f"{base_name}_output.json") with open(tmp_path, "w", encoding="utf-8") as f: json.dump(parsed_json, f, indent=2) else: json_output = "(No valid JSON extracted)" pretty_output = "(No structured content extracted)\n\nāš ļø Raw Model Output:\n" + raw_text tmp_path = os.path.join(tempfile.gettempdir(), f"{base_name}_output.txt") with open(tmp_path, "w", encoding="utf-8") as f: f.write(raw_text) return image, json_output, pretty_output, tmp_path # === Gradio UI iface = gr.Interface( fn=process_single_image, inputs=[ gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"), gr.File(file_types=["image"], label="Upload a BPMN Image") ], outputs=[ gr.Image(label="Input Image"), gr.Textbox(label="Raw JSON Output (Technical)", lines=20), gr.Textbox(label="Prettified View (User-Friendly)", lines=25), gr.File(label="šŸ“„ Download JSON", visible=True) ], title="šŸ–¼ļø Vision Model Extractor - JSON + Pretty View", description="Upload a BPMN image and select a vision model to extract structured output. Currenty supports only GPT-4o.", flagging_mode="never" ) # === Enable GPU mode and launch #@spaces.GPU def main(): iface.launch() if __name__ == "__main__": main()