Spaces:

ARCQUB
/

BPMN-entity-extractor

Sleeping

App Files Files Community

ARCQUB commited on Mar 31

Commit

6c0c37c

verified ·

1 Parent(s): 4ca761a

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

app.py +187 -0
models/aya_vision.py +128 -0
models/gpt4o.py +111 -0
models/pixtral.py +113 -0
models/qwen.py +121 -0
prompts/prompt.txt +145 -0
requirements.txt +10 -0

app.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import gradio as gr
+import importlib
+from PIL import Image
+import json
+# === Model Mapping ===
+MODEL_MAP = {
+    "Qwen": "models.qwen",
+    "Pixtral": "models.pixtral",
+    "Aya Vision": "models.aya_vision",
+    "GPT-4o": "models.gpt4o"
+}
+# === Load Model
+def load_model_runner(model_name):
+    module = importlib.import_module(MODEL_MAP[model_name])
+    return module.run_model
+# === Format Raw JSON Output
+def format_result_json(output):
+    if isinstance(output, dict):
+        return json.dumps(output, indent=2)
+    else:
+        return str(output).strip()
+# === Prettified Output View
+def format_pretty_view(output):
+    if not isinstance(output, dict):
+        return "No structured JSON found.\n\n" + str(output)
+    lines = []
+    process = output.get("process", output)
+    if "name" in process:
+        lines.append(f"📦 Process Name: {process['name']}\n")
+    if "startEvent" in process:
+        start = process["startEvent"]
+        name = start.get("name", "")
+        type_ = start.get("type", "")
+        desc = start.get("description", "")
+        line = f"▶️ Start: {name}"
+        if type_:
+            line += f" ({type_})"
+        if desc:
+            line += f" - {desc}"
+        lines.append(line)
+    if "endEvent" in process:
+        end = process["endEvent"]
+        name = end.get("name", "")
+        type_ = end.get("type", "")
+        desc = end.get("description", "")
+        line = f"⏹ End: {name}"
+        if type_:
+            line += f" ({type_})"
+        if desc:
+            line += f" - {desc}"
+        lines.append(line)
+    if "tasks" in process:
+        lines.append("\n🔹 Tasks:")
+        for t in process["tasks"]:
+            name = t.get("name", "")
+            type_ = t.get("type", "")
+            desc = t.get("description", "")
+            line = f" - {name}"
+            if type_:
+                line += f" ({type_})"
+            if desc:
+                line += f" - {desc}"
+            lines.append(line)
+    if "events" in process:
+        lines.append("\n📨 Events:")
+        for e in process["events"]:
+            name = e.get("name", "")
+            type_ = e.get("type", "")
+            desc = e.get("description", "")
+            line = f" - {name}"
+            if type_:
+                line += f" ({type_})"
+            if desc:
+                line += f" - {desc}"
+            lines.append(line)
+    if "gateways" in process:
+        lines.append("\n🔀 Gateways:")
+        for g in process["gateways"]:
+            name = g.get("name", "")
+            type_ = g.get("type", "")
+            label = g.get("label", "")  # some outputs may use 'label'
+            desc = g.get("description", "")
+            line = f" - {name}"
+            if type_:
+                line += f" ({type_})"
+            if label:
+                line += f" | Label: {label}"
+            if desc:
+                line += f" - {desc}"
+            lines.append(line)
+    if "sequenceFlows" in process:
+        lines.append("\n➡️ Sequence Flows:")
+        for f in process["sequenceFlows"]:
+            src = f.get("sourceTask") or f.get("sourceEvent") or "Unknown"
+            tgt = f.get("targetTask") or f.get("targetEvent") or "Unknown"
+            condition = f.get("condition", "")
+            line = f" - {src} → {tgt}"
+            if condition:
+                line += f" [Condition: {condition}]"
+            lines.append(line)
+    if "connections" in process:
+        lines.append("\n🔗 Connections:")
+        for c in process["connections"]:
+            src = c.get("sourceTask") or c.get("sourceEvent") or "Unknown"
+            tgt = c.get("targetTask") or c.get("targetEvent") or "Unknown"
+            condition = c.get("condition", "")
+            line = f" - {src} → {tgt}"
+            if condition:
+                line += f" [Condition: {condition}]"
+            lines.append(line)
+    if "relationships" in process:
+        lines.append("\n🔗 Relationships:")
+        for r in process["relationships"]:
+            source = r.get("source")
+            target = r.get("target")
+            src = source.get("ref", "Unknown") if isinstance(source, dict) else str(source)
+            tgt = target.get("ref", "Unknown") if isinstance(target, dict) else str(target)
+            desc = r.get("description", "")
+            line = f" - {src} → {tgt}"
+            if desc:
+                line += f" | {desc}"
+            lines.append(line)
+    return "\n".join(lines).strip()
+# === Main Inference Handler
+def process_single_image(model_name, image_file, api_key_file=None):
+    runner = load_model_runner(model_name)
+    image = Image.open(image_file.name).convert("RGB")
+    api_key = None
+    if model_name == "GPT-4o" and api_key_file is not None:
+        try:
+            api_key = open(api_key_file.name, "r").read().strip()
+        except Exception as e:
+            return image, "(API key file could not be read)", f"(Error: {e})"
+    if model_name == "GPT-4o":
+        result = runner(image, api_key=api_key)
+    else:
+        result = runner(image)
+    parsed_json = result.get("json")
+    raw_text = result.get("raw", "")
+    if parsed_json:
+        json_output = format_result_json(parsed_json)
+        pretty_output = format_pretty_view(parsed_json)
+    else:
+        json_output = "(No valid JSON extracted)"
+        pretty_output = "(No structured content extracted)\n\n⚠️ Raw Model Output:\n" + raw_text
+    return image, json_output, pretty_output
+# === Gradio Interface (Simple)
+iface = gr.Interface(
+    fn=process_single_image,
+    inputs=[
+        gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Select Vision Model"),
+        gr.File(file_types=["image"], label="Upload a BPMN Image"),
+        gr.File(file_types=[".txt"], label="🔐 Upload OpenAI API Key File (only for GPT-4o)")
+    ],
+    outputs=[
+        gr.Image(label="Input Image"),
+        gr.Textbox(label="Raw JSON Output (Technical)", lines=20),
+        gr.Textbox(label="Prettified View (User-Friendly)", lines=25)
+    ],
+    title="🖼️ Vision Model Extractor - JSON + Pretty View",
+    description="Upload a BPMN image and select a vision model to extract structured output. API key file is required only for GPT-4o.",
+    allow_flagging="never"
+)
+iface.launch(share=True)

models/aya_vision.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import json
+import re
+from PIL import Image
+import torch
+from transformers import AutoProcessor, AutoModelForImageTextToText
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
+# Set Hugging Face Token
+hf_token = os.getenv("HF_TOKEN")
+# Initialize Aya Vision Model
+model_id = "CohereForAI/aya-vision-8b"
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForImageTextToText.from_pretrained(
+    model_id, device_map="auto", torch_dtype=torch.float16
+)
+# Initialize Pix2Struct OCR Model
+ocr_processor = Pix2StructProcessor.from_pretrained("google/pix2struct-textcaps-base")
+ocr_model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-textcaps-base")
+# Load prompt
+def load_prompt():
+    with open("/content/vision_model_space/vision_model_space_new/prompts/prompt.txt", "r", encoding="utf-8") as f:
+        return f.read()
+# Try extracting JSON from model output
+def try_extract_json(text):
+    if not text or not text.strip():
+        return None
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        # Try extracting JSON substring by brace balancing
+        start = text.find('{')
+        if start == -1:
+            return None
+        brace_count = 0
+        json_candidate = ''
+        for i in range(start, len(text)):
+            char = text[i]
+            if char == '{':
+                brace_count += 1
+            elif char == '}':
+                brace_count -= 1
+            json_candidate += char
+            if brace_count == 0:
+                break
+        try:
+            return json.loads(json_candidate)
+        except json.JSONDecodeError:
+            return None
+# Extract OCR text using Pix2Struct
+def extract_all_text_pix2struct(image: Image.Image):
+    inputs = ocr_processor(images=image, return_tensors="pt")
+    predictions = ocr_model.generate(**inputs, max_new_tokens=512)
+    output_text = ocr_processor.decode(predictions[0], skip_special_tokens=True)
+    return output_text.strip()
+# Assign event/gateway names from OCR text
+def assign_event_gateway_names_from_ocr(json_data: dict, ocr_text: str):
+    if not ocr_text or not json_data:
+        return json_data
+    lines = [line.strip() for line in ocr_text.split('\n') if line.strip()]
+    def assign_best_guess(obj):
+        if not obj.get("name") or obj["name"].strip() == "":
+            obj["name"] = "(label unknown)"
+    for evt in json_data.get("events", []):
+        assign_best_guess(evt)
+    for gw in json_data.get("gateways", []):
+        assign_best_guess(gw)
+    return json_data
+# Run Aya model on image
+def run_model(image: Image.Image):
+    prompt = load_prompt()
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": prompt}
+            ]
+        }
+    ]
+    inputs = processor.apply_chat_template(
+        messages,
+        padding=True,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt"
+    ).to(model.device)
+    gen_tokens = model.generate(
+        **inputs,
+        max_new_tokens=5000,
+        do_sample=True,
+        temperature=0.3,
+    )
+    output_text = processor.tokenizer.decode(
+        gen_tokens[0][inputs.input_ids.shape[1]:],
+        skip_special_tokens=True
+    )
+    parsed_json = try_extract_json(output_text)
+    # Apply OCR post-processing
+    ocr_text = extract_all_text_pix2struct(image)
+    parsed_json = assign_event_gateway_names_from_ocr(parsed_json, ocr_text)
+    # Return both parsed and raw
+    return {
+        "json": parsed_json,
+        "raw": output_text
+    }

models/gpt4o.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# gpt4o_pix2struct_ocr.py
+import os
+import json
+import base64
+from PIL import Image
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
+import numpy as np
+import openai
+model = "gpt-4o"
+# Load Pix2Struct model + processor (vision-language OCR)
+processor = Pix2StructProcessor.from_pretrained("google/pix2struct-textcaps-base")
+pix2struct_model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-textcaps-base")
+def load_prompt(prompt_file="/content/vision_model_space/vision_model_space_new/prompts/prompt.txt"):
+    with open(prompt_file, "r", encoding="utf-8") as f:
+        return f.read().strip()
+def try_extract_json(text):
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        start = text.find('{')
+        if start == -1:
+            return None
+        brace_count = 0
+        json_candidate = ''
+        for i in range(start, len(text)):
+            if text[i] == '{':
+                brace_count += 1
+            elif text[i] == '}':
+                brace_count -= 1
+            json_candidate += text[i]
+            if brace_count == 0 and json_candidate.strip():
+                break
+        try:
+            return json.loads(json_candidate)
+        except json.JSONDecodeError:
+            return None
+def encode_image_base64(image: Image.Image):
+    from io import BytesIO
+    buffer = BytesIO()
+    image.save(buffer, format="JPEG")
+    return base64.b64encode(buffer.getvalue()).decode("utf-8")
+def extract_all_text_pix2struct(image: Image.Image):
+    inputs = processor(images=image, return_tensors="pt")
+    predictions = pix2struct_model.generate(**inputs, max_new_tokens=512)
+    output_text = processor.decode(predictions[0], skip_special_tokens=True)
+    return output_text.strip()
+# Optional: assign best-matching label from full extracted text using proximity (simplified version)
+def assign_event_gateway_names_from_ocr(image: Image.Image, json_data, ocr_text):
+    if not ocr_text:
+        return json_data
+    # You could use NLP matching or regex in complex cases
+    words = ocr_text.split()
+    def guess_name_fallback(obj):
+        if not obj.get("name") or obj["name"].strip() == "":
+            obj["name"] = "(label unknown)"  # fallback if matching logic isn't yet implemented
+    for evt in json_data.get("events", []):
+        guess_name_fallback(evt)
+    for gw in json_data.get("gateways", []):
+        guess_name_fallback(gw)
+    return json_data
+def run_model(image: Image.Image, api_key: str = None):
+    prompt_text = load_prompt()
+    encoded_image = encode_image_base64(image)
+    if not api_key:
+        return {"json": None, "raw": "⚠️ API key is missing. Please provide your OpenAI API key."}
+    client = openai.OpenAI(api_key=api_key)
+    response = client.chat.completions.create(
+        model=model,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": prompt_text},
+                    {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
+                ]
+            }
+        ],
+        max_tokens=5000
+    )
+    output_text = response.choices[0].message.content.strip()
+    parsed_json = try_extract_json(output_text)
+    # Vision-language OCR assist step (Pix2Struct)
+    full_ocr_text = extract_all_text_pix2struct(image)
+    parsed_json = assign_event_gateway_names_from_ocr(image, parsed_json, full_ocr_text)
+    return {"json": parsed_json, "raw": output_text}

models/pixtral.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import os
+import json
+import base64
+from PIL import Image
+from vllm import LLM
+from vllm.sampling_params import SamplingParams
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
+# Optional: Replace with your Hugging Face token or use environment variable
+hf_token = os.getenv("HF_TOKEN")
+Image.MAX_IMAGE_PIXELS = None
+# Initialize Pixtral model
+model_name = "mistralai/Pixtral-12B-2409"
+sampling_params = SamplingParams(max_tokens=5000)
+llm = LLM(model=model_name, tokenizer_mode="mistral", dtype="bfloat16", max_model_len=30000)
+# Initialize Pix2Struct OCR model
+ocr_processor = Pix2StructProcessor.from_pretrained("google/pix2struct-textcaps-base")
+ocr_model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-textcaps-base")
+# Load prompt from file
+def load_prompt():
+    with open("prompts/prompt.txt", "r", encoding="utf-8") as f:
+        return f.read()
+# Extract structured JSON from text
+def try_extract_json(text):
+    if not text or not text.strip():
+        return None
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        start = text.find('{')
+        if start == -1:
+            return None
+        brace_count = 0
+        json_candidate = ''
+        for i in range(start, len(text)):
+            if text[i] == '{':
+                brace_count += 1
+            elif text[i] == '}':
+                brace_count -= 1
+            json_candidate += text[i]
+            if brace_count == 0:
+                break
+        try:
+            return json.loads(json_candidate)
+        except json.JSONDecodeError:
+            return None
+# Base64 encode image
+def encode_image_as_base64(pil_image):
+    from io import BytesIO
+    buffer = BytesIO()
+    pil_image.save(buffer, format="JPEG")
+    encoded = base64.b64encode(buffer.getvalue()).decode("utf-8")
+    return encoded
+# Extract OCR text using Pix2Struct
+def extract_all_text_pix2struct(image: Image.Image):
+    inputs = ocr_processor(images=image, return_tensors="pt")
+    predictions = ocr_model.generate(**inputs, max_new_tokens=512)
+    output_text = ocr_processor.decode(predictions[0], skip_special_tokens=True)
+    return output_text.strip()
+# Assign event/gateway names from OCR text
+def assign_event_gateway_names_from_ocr(json_data: dict, ocr_text: str):
+    if not ocr_text or not json_data:
+        return json_data
+    lines = [line.strip() for line in ocr_text.split('\n') if line.strip()]
+    def assign_best_guess(obj):
+        if not obj.get("name") or obj["name"].strip() == "":
+            obj["name"] = "(label unknown)"
+    for evt in json_data.get("events", []):
+        assign_best_guess(evt)
+    for gw in json_data.get("gateways", []):
+        assign_best_guess(gw)
+    return json_data
+# Run model
+def run_model(image: Image.Image):
+    prompt = load_prompt()
+    encoded_image = encode_image_as_base64(image)
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": prompt},
+                {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}}
+            ]
+        }
+    ]
+    outputs = llm.chat(messages, sampling_params=sampling_params)
+    raw_output = outputs[0].outputs[0].text
+    parsed_json = try_extract_json(raw_output)
+    # Apply OCR post-processing
+    ocr_text = extract_all_text_pix2struct(image)
+    parsed_json = assign_event_gateway_names_from_ocr(parsed_json, ocr_text)
+    return {
+        "json": parsed_json,
+        "raw": raw_output
+    }

models/qwen.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+import json
+from PIL import Image
+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
+from qwen_vl_utils import process_vision_info
+from transformers import Pix2StructForConditionalGeneration, Pix2StructProcessor
+# Initialize Qwen2.5-VL model
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+    "Qwen/Qwen2.5-VL-7B-Instruct",
+    torch_dtype=torch.bfloat16,
+    device_map="cuda",
+    attn_implementation="flash_attention_2"
+)
+min_pixels = 256 * 28 * 28
+max_pixels = 1080 * 28 * 28
+processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
+# Initialize Pix2Struct OCR model
+ocr_processor = Pix2StructProcessor.from_pretrained("google/pix2struct-textcaps-base")
+ocr_model = Pix2StructForConditionalGeneration.from_pretrained("google/pix2struct-textcaps-base")
+# Load prompt
+def load_prompt():
+    with open("prompts/prompt.txt", "r") as f:
+        return f.read()
+# Try extracting JSON from text
+def try_extract_json(text):
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        start = text.find('{')
+        if start == -1:
+            return text
+        brace_count = 0
+        json_candidate = ''
+        for i in range(start, len(text)):
+            if text[i] == '{':
+                brace_count += 1
+            elif text[i] == '}':
+                brace_count -= 1
+            json_candidate += text[i]
+            if brace_count == 0:
+                break
+        try:
+            return json.loads(json_candidate)
+        except json.JSONDecodeError:
+            return text
+# Extract OCR text using Pix2Struct
+def extract_all_text_pix2struct(image: Image.Image):
+    inputs = ocr_processor(images=image, return_tensors="pt")
+    predictions = ocr_model.generate(**inputs, max_new_tokens=512)
+    output_text = ocr_processor.decode(predictions[0], skip_special_tokens=True)
+    return output_text.strip()
+# Assign event/gateway names from OCR text
+def assign_event_gateway_names_from_ocr(json_data: dict, ocr_text: str):
+    if not ocr_text or not json_data:
+        return json_data
+    lines = [line.strip() for line in ocr_text.split('\n') if line.strip()]
+    def assign_best_guess(obj):
+        if not obj.get("name") or obj["name"].strip() == "":
+            obj["name"] = "(label unknown)"
+    for evt in json_data.get("events", []):
+        assign_best_guess(evt)
+    for gw in json_data.get("gateways", []):
+        assign_best_guess(gw)
+    return json_data
+# Run model
+def run_model(image: Image.Image):
+    prompt = load_prompt()
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": image},
+                {"type": "text", "text": prompt}
+            ]
+        }
+    ]
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs = process_vision_info(messages)
+    inputs = processor(
+        text=[text],
+        images=image_inputs,
+        videos=video_inputs,
+        padding=True,
+        return_tensors="pt"
+    ).to("cuda")
+    generated_ids = model.generate(**inputs, max_new_tokens=5000)
+    generated_ids_trimmed = [out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)]
+    output_text = processor.batch_decode(
+        generated_ids_trimmed,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False
+    )[0]
+    parsed_json = try_extract_json(output_text)
+    # Apply OCR post-processing
+    ocr_text = extract_all_text_pix2struct(image)
+    parsed_json = assign_event_gateway_names_from_ocr(parsed_json, ocr_text)
+    return {
+        "json": parsed_json,
+        "raw": output_text
+    }

prompts/prompt.txt ADDED Viewed

	@@ -0,0 +1,145 @@

+You are an advanced BPMN diagram analysis engine specialized in extracting structured data from visual BPMN diagrams.
+You will be given an image containing a BPMN diagram. Your job is to identify, categorize, and extract all visible BPMN components based strictly on their visual appearance and layout.
+[IMPORTANT] Instructions:
+- Output must be a single structured JSON object.
+- Do NOT include any explanation or extra text — only the JSON output.
+- Do NOT infer or assume missing details. Only include elements that are visually present and identifiable.
+- If a label is unreadable or not clearly visible, exclude that element.
+- Detect and extract all BPMN components strictly based on visual appearance.
+- Include bounding boxes for all components.
+- Identify all text labels near tasks, events, and gateways and attach them accordingly.
+- Detect arrow types: solid arrows = Sequence Flows, dashed arrows = Message Flows.
+- Do not skip or infer missing elements.
+- If any label is attached to an event (e.g., "Money received", "Customer disagreed"), include it in the JSON output.
+[VISUAL ELEMENTS TO DETECT AND EXTRACT]
+- Pools: Rectangles enclosing process areas.
+- Lanes: Subdivisions within pools.
+- Tasks: Rounded rectangles representing activities.
+- Events: Circles (Start = thin border, Intermediate = double border, End = thick border).
+- Gateways: Diamond shapes (Exclusive, Parallel, Inclusive).
+- Sequence Flows: Solid arrows showing progression.
+- Message Flows: Dashed arrows indicating communication between participants.
+- Data Objects: Document symbols representing data or content.
+- Data Stores: Cylindrical storage symbols.
+- Other BPMN Artifacts: Any annotations, groups, or visual elements explicitly present.
+[IMPORTANT]
+For every event and gateway, detect and include the text label located near or next to the symbol,
+and assign it as the name field. Do not skip event/gateway labels even if the labels are outside the shape.
+Treat these labels as part of the component. If a name is not detected, return an empty string for name, do not omit the field.
+[EXAMPLE JSON OUTPUT FORMAT]
+Please follow the structure below exactly. Your output must start with a single JSON object as shown:
+{{
+  "pools": [
+    {{
+      "id": "pool_1",
+      "name": "Customer Process",
+      "bounding_box": {{ "x": 10, "y": 20, "width": 1200, "height": 700 }}
+    }}
+  ],
+  "lanes": [
+    {{
+      "id": "lane_1",
+      "name": "Customer",
+      "bounding_box": {{ "x": 50, "y": 30, "width": 1150, "height": 200 }},
+      "parent_pool": "pool_1"
+    }}
+  ],
+  "tasks": [
+    {{
+      "id": "task_1",
+      "name": "Submit Request",
+      "lane": "Customer",
+      "bounding_box": {{ "x": 100, "y": 50, "width": 150, "height": 80 }},
+      "incoming": ["start_event_1"],
+      "outgoing": ["task_2"]
+    }}
+  ],
+  "events": [
+    {{
+      "id": "start_event_1",
+      "type": "StartEvent",
+      "name": "Start Process",
+      "bounding_box": {{ "x": 50, "y": 75, "width": 40, "height": 40 }},
+      "outgoing": ["task_1"]
+    }},
+    {{
+      "id": "end_event_1",
+      "type": "EndEvent",
+      "name": "Process Complete",
+      "bounding_box": {{ "x": 1200, "y": 600, "width": 40, "height": 40 }},
+      "incoming": ["task_5"]
+    }},
+    {{
+      "id": "intermediate_event_1",
+      "type": "IntermediateEvent",
+      "name": "Customer Disagreed",
+      "bounding_box": { "x": 600, "y": 200, "width": 40, "height": 40 },
+      "incoming": ["task_3"],
+      "outgoing": ["task_6"]
+    }}
+  ],
+  "gateways": [
+    {{
+      "id": "gateway_1",
+      "type": "ExclusiveGateway",
+      "name": "Request Valid?",
+      "bounding_box": {{ "x": 600, "y": 150, "width": 50, "height": 50 }},
+      "incoming": ["task_3"],
+      "outgoing": ["task_4", "task_5"]
+    }},
+    {{
+      "id": "gateway_2",
+      "type": "ParallelGateway",
+      "name": "Split Tasks",
+      "bounding_box": { "x": 700, "y": 100, "width": 50, "height": 50 },
+      "incoming": ["task_4"],
+      "outgoing": ["task_5", "task_6"]
+    }}
+  ],
+  "datastores": [
+    {{
+      "id": "data_1",
+      "name": "Customer Database",
+      "bounding_box": {{ "x": 800, "y": 400, "width": 100, "height": 100 }},
+      "incoming": ["task_6"],
+      "outgoing": ["task_7"]
+    }}
+  ],
+  "flows": [
+    {{
+      "id": "flow_1",
+      "type": "SequenceFlow",
+      "name": "Submit Review",
+      "source": "start_event_1",
+      "target": "task_1",
+      "waypoints": [
+        {{ "x": 70, "y": 100 }},
+        {{ "x": 100, "y": 100 }}
+      ]
+    }},
+    {{
+      "id": "flow_2",
+      "type": "MessageFlow",
+      "name": "Disagreement Notification",
+      "source": "task_4",
+      "target": "external_system",
+      "waypoints": [
+        {{ "x": 400, "y": 200 }},
+        {{ "x": 500, "y": 300 }}
+      ]
+    }}
+  ]
+}}
+[FINAL INSTRUCTIONS]
+- Return ONLY the final JSON structure without any introductory or explanatory text.
+- Make sure the output is valid JSON and complete.
+- Do NOT include placeholder elements or guess missing labels.

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch
+git+https://github.com/huggingface/transformers
+pillow
+gradio
+qwen-vl-utils
+flash_attn
+vllm
+mistral_common
+paddleocr
+paddlepaddle