Spaces:

ELCA-SA
/

sketch-to-BPMN

Running

App Files Files Community

BenjiELCA commited on Jun 11, 2024

Commit

615e9f1

1 Parent(s): 25f3264

put online demo

Browse files

Files changed (12) hide show

.gitignore +13 -0
OCR.py +415 -0
demo_streamlit.py +339 -0
display.py +181 -0
eval.py +649 -0
flask.py +6 -0
htlm_webpage.py +141 -0
packages.txt +1 -0
requirements.txt +10 -0
toXML.py +351 -0
train.py +394 -0
utils.py +936 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,13 @@

+__pycache__/
+temp/
+VISION_KEY.json
+*.pth
+.streamlit/secrets.toml
+backup/

OCR.py ADDED Viewed

	@@ -0,0 +1,415 @@

+import os
+from azure.ai.vision.imageanalysis import ImageAnalysisClient
+from azure.ai.vision.imageanalysis.models import VisualFeatures
+from azure.core.credentials import AzureKeyCredential
+import time
+import numpy as np
+import networkx as nx
+from eval import iou
+from utils import class_dict, proportion_inside
+import json
+from utils import rescale_boxes as rescale
+import streamlit as st
+VISION_KEY = st.secrets["VISION_KEY"]
+VISION_ENDPOINT = st.secrets["VISION_ENDPOINT"]
+"""
+#If local execution
+with open("VISION_KEY.json", "r") as json_file:
+    json_data = json.load(json_file)
+# Step 2: Parse the JSON data (this is done by json.load automatically)
+VISION_KEY = json_data["VISION_KEY"]
+VISION_ENDPOINT = json_data["VISION_ENDPOINT"]
+"""
+def sample_ocr_image_file(image_data):
+    # Set the values of your computer vision endpoint and computer vision key
+    # as environment variables:
+    try:
+        endpoint = VISION_ENDPOINT
+        key = VISION_KEY
+    except KeyError:
+        print("Missing environment variable 'VISION_ENDPOINT' or 'VISION_KEY'")
+        print("Set them before running this sample.")
+        exit()
+    # Create an Image Analysis client
+    client = ImageAnalysisClient(
+        endpoint=endpoint,
+        credential=AzureKeyCredential(key)
+    )
+    # Extract text (OCR) from an image stream. This will be a synchronously (blocking) call.
+    result = client.analyze(
+        image_data=image_data,
+        visual_features=[VisualFeatures.READ]
+    )
+    return result
+def text_prediction(image):
+    #transform the image into a byte array
+    image.save('temp.jpg')
+    with open('temp.jpg', 'rb') as f:
+        image_data = f.read()
+    ocr_result = sample_ocr_image_file(image_data)
+    #delete the temporary image
+    os.remove('temp.jpg')
+    return ocr_result
+def filter_text(ocr_result, threshold=0.5):
+    words_to_cancel = {"+",".",",","#","@","!","?","(",")","[","]","{","}","<",">","/","\\","|","-","_","=","&","^","%","$","£","€","¥","¢","¤","§","©","®","™","°","±","×","÷","¶","∆","∏","∑","∞","√","∫","≈","≠","≤","≥","≡","∼"}
+    # Add every other one-letter word to the list of words to cancel, except 'I' and 'a'
+    for letter in "bcdefghjklmnopqrstuvwxyz1234567890":  # All lowercase letters except 'a'
+        words_to_cancel.add(letter)
+        words_to_cancel.add("i")
+        words_to_cancel.add(letter.upper())  # Add the uppercase version as well
+    characters_to_cancel = {"+", "<", ">"}  # Characters to cancel
+    list_of_lines = []
+    for block in ocr_result['readResult']['blocks']:
+        for line in block['lines']:
+            line_text = []
+            x_min, y_min = float('inf'), float('inf')
+            x_max, y_max = float('-inf'), float('-inf')
+            for word in line['words']:
+                if word['text'] in words_to_cancel or any(disallowed_char in word['text'] for disallowed_char in characters_to_cancel):
+                    continue
+                if word['confidence'] > threshold:
+                    if word['text']:
+                        line_text.append(word['text'])
+                        x = [point['x'] for point in word['boundingPolygon']]
+                        y = [point['y'] for point in word['boundingPolygon']]
+                        x_min = min(x_min, min(x))
+                        y_min = min(y_min, min(y))
+                        x_max = max(x_max, max(x))
+                        y_max = max(y_max, max(y))
+            if line_text:  # If there are valid words in the line
+                list_of_lines.append({
+                    'text': ' '.join(line_text),
+                    'boundingBox': [x_min,y_min,x_max,y_max]
+                })
+    list_text = []
+    list_bbox = []
+    for i in range(len(list_of_lines)):
+        list_text.append(list_of_lines[i]['text'])
+    for i in range(len(list_of_lines)):
+        list_bbox.append(list_of_lines[i]['boundingBox'])
+    list_of_lines = [list_bbox, list_text]
+    return list_of_lines
+def get_box_points(box):
+    """Returns all critical points of a box: corners and midpoints of edges."""
+    xmin, ymin, xmax, ymax = box
+    return np.array([
+        [xmin, ymin],  # Bottom-left corner
+        [xmax, ymin],  # Bottom-right corner
+        [xmin, ymax],  # Top-left corner
+        [xmax, ymax],  # Top-right corner
+        [(xmin + xmax) / 2, ymin],  # Midpoint of bottom edge
+        [(xmin + xmax) / 2, ymax],  # Midpoint of top edge
+        [xmin, (ymin + ymax) / 2],  # Midpoint of left edge
+        [xmax, (ymin + ymax) / 2]   # Midpoint of right edge
+    ])
+def min_distance_between_boxes(box1, box2):
+    """Computes the minimum distance between two boxes considering all critical points."""
+    points1 = get_box_points(box1)
+    points2 = get_box_points(box2)
+    min_dist = float('inf')
+    for point1 in points1:
+        for point2 in points2:
+            dist = np.linalg.norm(point1 - point2)
+            if dist < min_dist:
+                min_dist = dist
+    return min_dist
+def is_inside(box1, box2):
+    """Check if the center of box1 is inside box2."""
+    x_center = (box1[0] + box1[2]) / 2
+    y_center = (box1[1] + box1[3]) / 2
+    return box2[0] <= x_center <= box2[2] and box2[1] <= y_center <= box2[3]
+def are_close(box1, box2, threshold=50):
+    """Determines if boxes are close based on their corners and center points."""
+    corners1 = np.array([
+        [box1[0], box1[1]], [box1[0], box1[3]], [box1[2], box1[1]], [box1[2], box1[3]],
+        [(box1[0]+box1[2])/2, box1[1]], [(box1[0]+box1[2])/2, box1[3]],
+        [box1[0], (box1[1]+box1[3])/2], [box1[2], (box1[1]+box1[3])/2]
+    ])
+    corners2 = np.array([
+        [box2[0], box2[1]], [box2[0], box2[3]], [box2[2], box2[1]], [box2[2], box2[3]],
+        [(box2[0]+box2[2])/2, box2[1]], [(box2[0]+box2[2])/2, box2[3]],
+        [box2[0], (box2[1]+box2[3])/2], [box2[2], (box2[1]+box2[3])/2]
+    ])
+    for c1 in corners1:
+        for c2 in corners2:
+            if np.linalg.norm(c1 - c2) < threshold:
+                return True
+    return False
+def find_closest_box(text_box, all_boxes, labels, threshold, iou_threshold=0.5):
+    """Find the closest box to the given text box within a specified threshold."""
+    min_distance = float('inf')
+    closest_index = None
+    #check if the text is inside a sequenceFlow
+    for j in range(len(all_boxes)):
+        if proportion_inside(text_box, all_boxes[j])>iou_threshold and labels[j] == list(class_dict.values()).index('sequenceFlow'):
+            return j
+    for i, box in enumerate(all_boxes):
+        # Compute the center of both boxes
+        center_text = np.array([(text_box[0] + text_box[2]) / 2, (text_box[1] + text_box[3]) / 2])
+        center_box = np.array([(box[0] + box[2]) / 2, (box[1] + box[3]) / 2])
+        # Calculate Euclidean distance between centers
+        distance = np.linalg.norm(center_text - center_box)
+        # Update closest box if this box is nearer
+        if distance < min_distance:
+            min_distance = distance
+            closest_index = i
+    # Check if the closest box found is within the acceptable threshold
+    if min_distance < threshold:
+        return closest_index
+    return None
+def is_vertical(box):
+    """Determine if the text in the bounding box is vertically aligned."""
+    width = box[2] - box[0]
+    height = box[3] - box[1]
+    return (height > 2*width)
+def group_texts(task_boxes, text_boxes, texts, min_dist=50, iou_threshold=0.8, percentage_thresh=0.8):
+    """Maps text boxes to task boxes and groups texts within each task based on proximity."""
+    G = nx.Graph()
+    # Map each text box to the nearest task box
+    task_to_texts = {i: [] for i in range(len(task_boxes))}
+    information_texts = []  # texts not inside any task box
+    text_to_task_mapped = [False] * len(text_boxes)
+    for idx, text_box in enumerate(text_boxes):
+        mapped = False
+        for jdx, task_box in enumerate(task_boxes):
+            if proportion_inside(text_box, task_box)>iou_threshold:
+                task_to_texts[jdx].append(idx)
+                text_to_task_mapped[idx] = True
+                mapped = True
+                break
+        if not mapped:
+            information_texts.append(idx)
+    all_grouped_texts = []
+    sentence_boxes = []  # Store the bounding box for each sentence
+    # Process texts for each task
+    for task_texts in task_to_texts.values():
+        G.clear()
+        for i in task_texts:
+            G.add_node(i)
+            for j in task_texts:
+                if i != j and are_close(text_boxes[i], text_boxes[j]) and not is_vertical(text_boxes[i]) and not is_vertical(text_boxes[j]):
+                    G.add_edge(i, j)
+        groups = list(nx.connected_components(G))
+        for group in groups:
+            group = list(group)
+            lines = {}
+            for idx in group:
+                y_center = (text_boxes[idx][1] + text_boxes[idx][3]) / 2
+                found_line = False
+                for line in lines:
+                    if abs(y_center - line) < (text_boxes[idx][3] - text_boxes[idx][1]) / 2:
+                        lines[line].append(idx)
+                        found_line = True
+                        break
+                if not found_line:
+                    lines[y_center] = [idx]
+            sorted_lines = sorted(lines.keys())
+            grouped_texts = []
+            min_x = min_y = float('inf')
+            max_x = max_y = -float('inf')
+            for line in sorted_lines:
+                sorted_indices = sorted(lines[line], key=lambda idx: text_boxes[idx][0])
+                line_text = ' '.join(texts[idx] for idx in sorted_indices)
+                grouped_texts.append(line_text)
+                for idx in sorted_indices:
+                    box = text_boxes[idx]
+                    min_x = min(min_x-5, box[0]-5)
+                    min_y = min(min_y-5, box[1]-5)
+                    max_x = max(max_x+5, box[2]+5)
+                    max_y = max(max_y+5, box[3]+5)
+            all_grouped_texts.append(' '.join(grouped_texts))
+            sentence_boxes.append([min_x, min_y, max_x, max_y])
+    # Group information texts
+    G.clear()
+    info_sentence_boxes = []
+    for i in information_texts:
+        G.add_node(i)
+        for j in information_texts:
+            if i != j and are_close(text_boxes[i], text_boxes[j], percentage_thresh * min_dist) and not is_vertical(text_boxes[i]) and not is_vertical(text_boxes[j]):
+                G.add_edge(i, j)
+    info_groups = list(nx.connected_components(G))
+    information_grouped_texts = []
+    for group in info_groups:
+        group = list(group)
+        lines = {}
+        for idx in group:
+            y_center = (text_boxes[idx][1] + text_boxes[idx][3]) / 2
+            found_line = False
+            for line in lines:
+                if abs(y_center - line) < (text_boxes[idx][3] - text_boxes[idx][1]) / 2:
+                    lines[line].append(idx)
+                    found_line = True
+                    break
+            if not found_line:
+                lines[y_center] = [idx]
+        sorted_lines = sorted(lines.keys())
+        grouped_texts = []
+        min_x = min_y = float('inf')
+        max_x = max_y = -float('inf')
+        for line in sorted_lines:
+            sorted_indices = sorted(lines[line], key=lambda idx: text_boxes[idx][0])
+            line_text = ' '.join(texts[idx] for idx in sorted_indices)
+            grouped_texts.append(line_text)
+            for idx in sorted_indices:
+                box = text_boxes[idx]
+                min_x = min(min_x, box[0])
+                min_y = min(min_y, box[1])
+                max_x = max(max_x, box[2])
+                max_y = max(max_y, box[3])
+        information_grouped_texts.append(' '.join(grouped_texts))
+        info_sentence_boxes.append([min_x, min_y, max_x, max_y])
+    return all_grouped_texts, sentence_boxes, information_grouped_texts, info_sentence_boxes
+def mapping_text(full_pred, text_pred, print_sentences=False,percentage_thresh=0.6,scale=1.0, iou_threshold=0.5):
+    ########### REFAIRE CETTE FONCTION ###########
+    #refaire la fonction pour qu'elle prenne en premier les elements qui sont dans les task et ensuite prendre un seuil de distance pour les autres elements
+    #ou sinon faire la distance entre les elements et non pas seulement les tasks
+     # Example usage
+    boxes = rescale(scale, full_pred['boxes'])
+    min_dist = 200
+    labels = full_pred['labels']
+    avoid = [list(class_dict.values()).index('pool'), list(class_dict.values()).index('lane'), list(class_dict.values()).index('sequenceFlow'), list(class_dict.values()).index('messageFlow'), list(class_dict.values()).index('dataAssociation')]
+    for i in range(len(boxes)):
+            box1 = boxes[i]
+            if labels[i] in avoid:
+                continue
+            for j in range(i + 1, len(boxes)):
+                    box2 = boxes[j]
+                    if labels[j] in avoid:
+                        continue
+                    dist = min_distance_between_boxes(box1, box2)
+                    min_dist = min(min_dist, dist)
+    #print("Minimum distance between boxes:", min_dist)
+    text_pred[0] = rescale(scale, text_pred[0])
+    task_boxes = [box for i, box in enumerate(boxes) if full_pred['labels'][i] == list(class_dict.values()).index('task')]
+    grouped_sentences, sentence_bounding_boxes, info_texts, info_boxes = group_texts(task_boxes, text_pred[0], text_pred[1], min_dist=min_dist)
+    BPMN_id = set(full_pred['BPMN_id'])  # This ensures uniqueness of task names
+    text_mapping = {id: '' for id in BPMN_id}
+    if print_sentences:
+        for sentence, box in zip(grouped_sentences, sentence_bounding_boxes):
+            print("Task-related Text:", sentence)
+            print("Bounding Box:", box)
+        print("Information Texts:", info_texts)
+        print("Information Bounding Boxes:", info_boxes)
+    # Map the grouped sentences to the corresponding task
+    for i in range(len(sentence_bounding_boxes)):
+        for j in range(len(boxes)):
+            if proportion_inside(sentence_bounding_boxes[i], boxes[j])>iou_threshold and full_pred['labels'][j] == list(class_dict.values()).index('task'):
+                text_mapping[full_pred['BPMN_id'][j]]=grouped_sentences[i]
+    # Map the grouped sentences to the corresponding pool
+    for i in range(len(info_boxes)):
+        if is_vertical(info_boxes[i]):
+            for j in range(len(boxes)):
+                if proportion_inside(info_boxes[i], boxes[j])>0 and full_pred['labels'][j] == list(class_dict.values()).index('pool'):
+                    print("Text:", info_texts[i], "associate with ", full_pred['BPMN_id'][j])
+                    bpmn_id = full_pred['BPMN_id'][j]
+                    # Append new text or create new entry if not existing
+                    if bpmn_id in text_mapping:
+                        text_mapping[bpmn_id] += " " + info_texts[i]  # Append text with a space in between
+                    else:
+                        text_mapping[bpmn_id] = info_texts[i]
+                    info_texts[i] = ''  # Clear the text to avoid re-use
+    # Map the grouped sentences to the corresponding object
+    for i in range(len(info_boxes)):
+        if is_vertical(info_boxes[i]):
+            continue  # Skip if the text is vertical
+        for j in range(len(boxes)):
+            if info_texts[i] == '':
+                continue  # Skip if there's no text
+            if (proportion_inside(info_boxes[i], boxes[j])>0 or are_close(info_boxes[i], boxes[j], threshold=percentage_thresh*min_dist)) and (full_pred['labels'][j] == list(class_dict.values()).index('event')
+                                                                             or full_pred['labels'][j] == list(class_dict.values()).index('messageEvent')
+                                                                             or full_pred['labels'][j] == list(class_dict.values()).index('timerEvent')
+                                                                             or full_pred['labels'][j] == list(class_dict.values()).index('dataObject')) :
+                bpmn_id = full_pred['BPMN_id'][j]
+                # Append new text or create new entry if not existing
+                if bpmn_id in text_mapping:
+                    text_mapping[bpmn_id] += " " + info_texts[i]  # Append text with a space in between
+                else:
+                    text_mapping[bpmn_id] = info_texts[i]
+                info_texts[i] = ''  # Clear the text to avoid re-use
+    # Map the grouped sentences to the corresponding flow
+    for i in range(len(info_boxes)):
+        if info_texts[i] == '' or is_vertical(info_boxes[i]):
+            continue  # Skip if there's no text
+        # Find the closest box within the defined threshold
+        closest_index = find_closest_box(info_boxes[i], boxes, full_pred['labels'], threshold=4*min_dist)
+        if closest_index is not None and (full_pred['labels'][closest_index] == list(class_dict.values()).index('sequenceFlow') or full_pred['labels'][closest_index] == list(class_dict.values()).index('messageFlow')):
+            bpmn_id = full_pred['BPMN_id'][closest_index]
+            # Append new text or create new entry if not existing
+            if bpmn_id in text_mapping:
+                text_mapping[bpmn_id] += " " + info_texts[i]  # Append text with a space in between
+            else:
+                text_mapping[bpmn_id] = info_texts[i]
+            info_texts[i] = ''  # Clear the text to avoid re-use
+    if print_sentences:
+        print("Text Mapping:", text_mapping)
+        print("Information Texts left:", info_texts)
+    return text_mapping

demo_streamlit.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import streamlit as st
+import streamlit.components.v1 as components
+from PIL import Image
+import torch
+from torchvision.transforms import functional as F
+from PIL import Image, ImageEnhance
+from htlm_webpage import display_bpmn_xml
+import gc
+import psutil
+from OCR import text_prediction, filter_text, mapping_text, rescale
+from train import prepare_model
+from utils import draw_annotations, create_loader, class_dict, arrow_dict, object_dict
+from toXML import calculate_pool_bounds, add_diagram_elements
+from pathlib import Path
+from toXML import create_bpmn_object, create_flow_element
+import xml.etree.ElementTree as ET
+import numpy as np
+from display import draw_stream
+from eval import full_prediction
+from streamlit_image_comparison import image_comparison
+from xml.dom import minidom
+from streamlit_cropper import st_cropper
+from streamlit_drawable_canvas import st_canvas
+from utils import find_closest_object
+from train import get_faster_rcnn_model, get_arrow_model
+import gdown
+def get_memory_usage():
+    process = psutil.Process()
+    mem_info = process.memory_info()
+    return mem_info.rss / (1024 ** 2)  # Return memory usage in MB
+def clear_memory():
+    st.session_state.clear()
+    gc.collect()
+# Function to read XML content from a file
+def read_xml_file(filepath):
+    """ Read XML content from a file """
+    with open(filepath, 'r', encoding='utf-8') as file:
+        return file.read()
+# Function to modify bounding box positions based on the given sizes
+def modif_box_pos(pred, size):
+    for i, (x1, y1, x2, y2) in enumerate(pred['boxes']):
+        center = [(x1 + x2) / 2, (y1 + y2) / 2]
+        label = class_dict[pred['labels'][i]]
+        if label in size:
+            pred['boxes'][i] = [center[0] - size[label][0] / 2, center[1] - size[label][1] / 2, center[0] + size[label][0] / 2, center[1] + size[label][1] / 2]
+    return pred
+# Function to create a BPMN XML file from prediction results
+def create_XML(full_pred, text_mapping, scale):
+    namespaces = {
+        'bpmn': 'http://www.omg.org/spec/BPMN/20100524/MODEL',
+        'bpmndi': 'http://www.omg.org/spec/BPMN/20100524/DI',
+        'di': 'http://www.omg.org/spec/DD/20100524/DI',
+        'dc': 'http://www.omg.org/spec/DD/20100524/DC',
+        'xsi': 'http://www.w3.org/2001/XMLSchema-instance'
+    }
+    size_elements = {
+        'start': (54, 54),
+        'task': (150, 120),
+        'message': (54, 54),
+        'messageEvent': (54, 54),
+        'end': (54, 54),
+        'exclusiveGateway': (75, 75),
+        'event': (54, 54),
+        'parallelGateway': (75, 75),
+        'sequenceFlow': (225, 15),
+        'pool': (375, 150),
+        'lane': (300, 150),
+        'dataObject': (60, 90),
+        'dataStore': (90, 90),
+        'subProcess': (180, 135),
+        'eventBasedGateway': (75, 75),
+        'timerEvent': (60, 60),
+    }
+    definitions = ET.Element('bpmn:definitions', {
+        'xmlns:xsi': namespaces['xsi'],
+        'xmlns:bpmn': namespaces['bpmn'],
+        'xmlns:bpmndi': namespaces['bpmndi'],
+        'xmlns:di': namespaces['di'],
+        'xmlns:dc': namespaces['dc'],
+        'targetNamespace': "http://example.bpmn.com",
+        'id': "simpleExample"
+    })
+    # Create BPMN collaboration element
+    collaboration = ET.SubElement(definitions, 'bpmn:collaboration', id='collaboration_1')
+    # Create BPMN process elements
+    process = []
+    for idx in range(len(full_pred['pool_dict'].items())):
+        process_id = f'process_{idx+1}'
+        process.append(ET.SubElement(definitions, 'bpmn:process', id=process_id, isExecutable='false', name=text_mapping[full_pred['BPMN_id'][list(full_pred['pool_dict'].keys())[idx]]]))
+    bpmndi = ET.SubElement(definitions, 'bpmndi:BPMNDiagram', id='BPMNDiagram_1')
+    bpmnplane = ET.SubElement(bpmndi, 'bpmndi:BPMNPlane', id='BPMNPlane_1', bpmnElement='collaboration_1')
+    full_pred['boxes'] = rescale(scale, full_pred['boxes'])
+    # Add diagram elements for each pool
+    for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
+        pool_id = f'participant_{idx+1}'
+        pool = ET.SubElement(collaboration, 'bpmn:participant', id=pool_id, processRef=f'process_{idx+1}', name=text_mapping[full_pred['BPMN_id'][list(full_pred['pool_dict'].keys())[idx]]])
+        # Calculate the bounding box for the pool
+        if len(keep_elements) == 0:
+            min_x, min_y, max_x, max_y = full_pred['boxes'][pool_index]
+            pool_width = max_x - min_x
+            pool_height = max_y - min_y
+        else:
+            min_x, min_y, max_x, max_y = calculate_pool_bounds(full_pred, keep_elements, size_elements)
+            pool_width = max_x - min_x + 100  # Adding padding
+            pool_height = max_y - min_y + 100  # Adding padding
+        add_diagram_elements(bpmnplane, pool_id, min_x - 50, min_y - 50, pool_width, pool_height)
+    # Create BPMN elements for each pool
+    for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
+        create_bpmn_object(process[idx], bpmnplane, text_mapping, definitions, size_elements, full_pred, keep_elements)
+    # Create message flow elements
+    message_flows = [i for i, label in enumerate(full_pred['labels']) if class_dict[label] == 'messageFlow']
+    for idx in message_flows:
+        create_flow_element(bpmnplane, text_mapping, idx, size_elements, full_pred, collaboration, message=True)
+    # Create sequence flow elements
+    for idx, (pool_index, keep_elements) in enumerate(full_pred['pool_dict'].items()):
+        for i in keep_elements:
+            if full_pred['labels'][i] == list(class_dict.values()).index('sequenceFlow'):
+                create_flow_element(bpmnplane, text_mapping, i, size_elements, full_pred, process[idx], message=False)
+    # Generate pretty XML string
+    tree = ET.ElementTree(definitions)
+    rough_string = ET.tostring(definitions, 'utf-8')
+    reparsed = minidom.parseString(rough_string)
+    pretty_xml_as_string = reparsed.toprettyxml(indent="  ")
+    full_pred['boxes'] = rescale(1/scale, full_pred['boxes'])
+    return pretty_xml_as_string
+# Function to load the models only once and use session state to keep track of it
+def load_models():
+    with st.spinner('Loading model...'):
+        model_object = get_faster_rcnn_model(len(object_dict))
+        model_arrow = get_arrow_model(len(arrow_dict),2)
+        url_arrow = 'https://drive.google.com/uc?id=1xwfvo7BgDWz-1jAiJC1DCF0Wp8YlFNWt'
+        url_object = 'https://drive.google.com/uc?id=1GiM8xOXG6M6r8J9HTOeMJz9NKu7iumZi'
+        # Define paths to save models
+        output_arrow = 'model_arrow.pth'
+        output_object = 'model_object.pth'
+        # Download models using gdown
+        if not Path(output_arrow).exists():
+            # Download models using gdown
+            gdown.download(url_arrow, output_arrow, quiet=False)
+        else:
+            print('Model arrow downloaded from local')
+        if not Path(output_object).exists():
+            gdown.download(url_object, output_object, quiet=False)
+        else:
+            print('Model object downloaded from local')
+        # Load models
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        model_arrow.load_state_dict(torch.load(output_arrow, map_location=device))
+        model_object.load_state_dict(torch.load(output_object, map_location=device))
+        st.session_state.model_loaded = True
+        st.session_state.model_arrow = model_arrow
+        st.session_state.model_object = model_object
+# Function to prepare the image for processing
+def prepare_image(image, pad=True, new_size=(1333, 1333)):
+    original_size = image.size
+    # Calculate scale to fit the new size while maintaining aspect ratio
+    scale = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
+    new_scaled_size = (int(original_size[0] * scale), int(original_size[1] * scale))
+    # Resize image to new scaled size
+    image = F.resize(image, (new_scaled_size[1], new_scaled_size[0]))
+    if pad:
+        enhancer = ImageEnhance.Brightness(image)
+        image = enhancer.enhance(1.5)  # Adjust the brightness if necessary
+        # Pad the resized image to make it exactly the desired size
+        padding = [0, 0, new_size[0] - new_scaled_size[0], new_size[1] - new_scaled_size[1]]
+        image = F.pad(image, padding, fill=200, padding_mode='edge')
+    return new_scaled_size, image
+# Function to display various options for image annotation
+def display_options(image, score_threshold):
+    col1, col2, col3, col4, col5 = st.columns(5)
+    with col1:
+        write_class = st.toggle("Write Class", value=True)
+        draw_keypoints = st.toggle("Draw Keypoints", value=True)
+        draw_boxes = st.toggle("Draw Boxes", value=True)
+    with col2:
+        draw_text = st.toggle("Draw Text", value=False)
+        write_text = st.toggle("Write Text", value=False)
+        draw_links = st.toggle("Draw Links", value=False)
+    with col3:
+        write_score = st.toggle("Write Score", value=True)
+        write_idx = st.toggle("Write Index", value=False)
+    with col4:
+        # Define options for the dropdown menu
+        dropdown_options = [list(class_dict.values())[i] for i in range(len(class_dict))]
+        dropdown_options[0] = 'all'
+        selected_option = st.selectbox("Show class", dropdown_options)
+    # Draw the annotated image with selected options
+    annotated_image = draw_stream(
+        np.array(image), prediction=st.session_state.prediction, text_predictions=st.session_state.text_pred,
+        draw_keypoints=draw_keypoints, draw_boxes=draw_boxes, draw_links=draw_links, draw_twins=False, draw_grouped_text=draw_text,
+        write_class=write_class, write_text=write_text, keypoints_correction=True, write_idx=write_idx, only_print=selected_option,
+        score_threshold=score_threshold, write_score=write_score, resize=True, return_image=True, axis=True
+    )
+    # Display the original and annotated images side by side
+    image_comparison(
+        img1=annotated_image,
+        img2=image,
+        label1="Annotated Image",
+        label2="Original Image",
+        starting_position=99,
+        width=1000,
+    )
+# Function to perform inference on the uploaded image using the loaded models
+def perform_inference(model_object, model_arrow, image, score_threshold):
+    _, uploaded_image = prepare_image(image, pad=False)
+    img_tensor = F.to_tensor(prepare_image(image.convert('RGB'))[1])
+    # Display original image
+    if 'image_placeholder' not in st.session_state:
+        image_placeholder = st.empty()  # Create an empty placeholder
+    image_placeholder.image(uploaded_image, caption='Original Image', width=1000)
+    # Prediction
+    _, st.session_state.prediction = full_prediction(model_object, model_arrow, img_tensor, score_threshold=score_threshold, iou_threshold=0.5)
+    # Perform OCR on the uploaded image
+    ocr_results = text_prediction(uploaded_image)
+    # Filter and map OCR results to prediction results
+    st.session_state.text_pred = filter_text(ocr_results, threshold=0.5)
+    st.session_state.text_mapping = mapping_text(st.session_state.prediction, st.session_state.text_pred, print_sentences=False, percentage_thresh=0.5)
+    # Remove the original image display
+    image_placeholder.empty()
+    # Force garbage collection
+    gc.collect()
+@st.cache_data
+def get_image(uploaded_file):
+    return Image.open(uploaded_file).convert('RGB')
+def main():
+    st.set_page_config(layout="wide")
+    st.title("BPMN model recognition demo")
+     # Display current memory usage
+    memory_usage = get_memory_usage()
+    print(f"Current memory usage: {memory_usage:.2f} MB")
+    # Initialize the session state for storing pool bounding boxes
+    if 'pool_bboxes' not in st.session_state:
+        st.session_state.pool_bboxes = []
+    # Load the models using the defined function
+    if 'model_object' not in st.session_state or 'model_arrow' not in st.session_state:
+        clear_memory()
+        load_models()
+    model_arrow = st.session_state.model_arrow
+    model_object = st.session_state.model_object
+    #Create the layout for the app
+    col1, col2 = st.columns(2)
+    with col1:
+        # Create a file uploader for the user to upload an image
+        uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+    # Display the uploaded image if the user has uploaded an image
+    if uploaded_file is not None:
+        original_image = get_image(uploaded_file)
+        col1, col2 = st.columns(2)
+        # Create a cropper to allow the user to crop the image and display the cropped image
+        with col1:
+            cropped_image = st_cropper(original_image, realtime_update=True, box_color='#0000FF', should_resize_image=True, default_coords=(30, original_image.size[0]-30, 30, original_image.size[1]-30))
+        with col2:
+            st.image(cropped_image, caption="Cropped Image", use_column_width=False, width=500)
+        # Display the options for the user to set the score threshold and scale
+        if cropped_image is not None:
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                score_threshold = st.slider("Set score threshold for prediction", min_value=0.0, max_value=1.0, value=0.5, step=0.05)
+            with col2:
+                st.session_state.scale = st.slider("Set scale for XML file", min_value=0.1, max_value=2.0, value=1.0, step=0.1)
+            # Launch the prediction when the user clicks the button
+            if st.button("Launch Prediction"):
+                st.session_state.crop_image = cropped_image
+                with st.spinner('Processing...'):
+                    perform_inference(model_object, model_arrow, st.session_state.crop_image, score_threshold)
+                    st.session_state.prediction = modif_box_pos(st.session_state.prediction, object_dict)
+                    print('Detection completed!')
+    # If the prediction has been made and the user has uploaded an image, display the options for the user to annotate the image
+    if 'prediction' in st.session_state and uploaded_file is not None:
+        st.success('Detection completed!')
+        display_options(st.session_state.crop_image, score_threshold)
+        #if st.session_state.prediction_up==True:
+        st.session_state.bpmn_xml = create_XML(st.session_state.prediction.copy(), st.session_state.text_mapping, st.session_state.scale)
+        display_bpmn_xml(st.session_state.bpmn_xml)
+        # Force garbage collection after display
+        gc.collect()
+if __name__ == "__main__":
+    print('Starting the app...')
+    main()

display.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from utils import draw_annotations, create_loader, class_dict, resize_boxes, resize_keypoints, find_other_keypoint
+import cv2
+import numpy as np
+import torch
+import matplotlib.pyplot as plt
+from OCR import group_texts
+def draw_stream(image,
+                prediction=None,
+                text_predictions=None,
+                class_dict=class_dict,
+                draw_keypoints=False,
+                draw_boxes=False,
+                draw_text=False,
+                draw_links=False,
+                draw_twins=False,
+                draw_grouped_text=False,
+                write_class=False,
+                write_score=False,
+                write_text=False,
+                score_threshold=0.4,
+                write_idx=False,
+                keypoints_correction=False,
+                new_size=(1333, 1333),
+                only_print=None,
+                axis=False,
+                return_image=False,
+                resize=False):
+    """
+    Draws annotations on images including bounding boxes, keypoints, links, and text.
+    Parameters:
+    - image (np.array): The image on which annotations will be drawn.
+    - target (dict): Ground truth data containing boxes, labels, etc.
+    - prediction (dict): Prediction data from a model.
+    - full_prediction (dict): Additional detailed prediction data, potentially including relationships.
+    - text_predictions (tuple): OCR text predictions containing bounding boxes and texts.
+    - class_dict (dict): Mapping from class IDs to class names.
+    - draw_keypoints (bool): Flag to draw keypoints.
+    - draw_boxes (bool): Flag to draw bounding boxes.
+    - draw_text (bool): Flag to draw text annotations.
+    - draw_links (bool): Flag to draw links between annotations.
+    - draw_twins (bool): Flag to draw twins keypoints.
+    - write_class (bool): Flag to write class names near the annotations.
+    - write_score (bool): Flag to write scores near the annotations.
+    - write_text (bool): Flag to write OCR recognized text.
+    - score_threshold (float): Threshold for scores above which annotations will be drawn.
+    - only_print (str): Specific class name to filter annotations by.
+    - resize (bool): Whether to resize annotations to fit the image size.
+    """
+    # Convert image to RGB (if not already in that format)
+    if prediction is None:
+        image = image.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    image_copy = image.copy()
+    scale = max(image.shape[0], image.shape[1]) / 1000
+    original_size = (image.shape[0], image.shape[1])
+    # Calculate scale to fit the new size while maintaining aspect ratio
+    scale_ = min(new_size[0] / original_size[0], new_size[1] / original_size[1])
+    new_scaled_size = (int(original_size[0] * scale_), int(original_size[1] * scale_))
+    for i in range(len(prediction['boxes'])):
+        box = prediction['boxes'][i]
+        x1, y1, x2, y2 = box
+        if resize:
+            x1, y1, x2, y2 = resize_boxes(np.array([box]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+        score = prediction['scores'][i]
+        if score < score_threshold:
+            continue
+        if draw_boxes:
+            if only_print is not None and only_print != 'all':
+                if prediction['labels'][i] != list(class_dict.values()).index(only_print):
+                    continue
+            cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0), int(2*scale))
+        if write_score:
+            cv2.putText(image_copy, str(round(score, 2)), (int(x1), int(y1) + int(15*scale)), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (100,100, 255), 2)
+        if write_idx:
+            cv2.putText(image_copy, str(i), (int(x1) + int(15*scale), int(y1) + int(15*scale)), cv2.FONT_HERSHEY_SIMPLEX, 2*scale, (0,0, 0), 2)
+        if write_class and 'labels' in prediction:
+            class_id = prediction['labels'][i]
+            cv2.putText(image_copy, class_dict[class_id], (int(x1), int(y1) - int(2*scale)), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (255, 100, 100), 2)
+        # Draw keypoints if available
+        if draw_keypoints and 'keypoints' in prediction:
+            for i in range(len(prediction['keypoints'])):
+                kp = prediction['keypoints'][i]
+                for j in range(kp.shape[0]):
+                    if prediction['labels'][i] != list(class_dict.values()).index('sequenceFlow') and prediction['labels'][i] != list(class_dict.values()).index('messageFlow') and prediction['labels'][i] != list(class_dict.values()).index('dataAssociation'):
+                        continue
+                    score = prediction['scores'][i]
+                    if score < score_threshold:
+                        continue
+                    x,y, v = np.array(kp[j])
+                    x, y, v = resize_keypoints(np.array([kp[j]]), (new_scaled_size[1],new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+                    if j == 0:
+                        cv2.circle(image_copy, (int(x), int(y)), int(5*scale), (0, 0, 255), -1)
+                    else:
+                        cv2.circle(image_copy, (int(x), int(y)), int(5*scale), (255, 0, 0), -1)
+    # Draw text predictions if available
+    if (draw_text or write_text) and text_predictions is not None:
+        for i in range(len(text_predictions[0])):
+            x1, y1, x2, y2 = text_predictions[0][i]
+            text = text_predictions[1][i]
+            if resize:
+                x1, y1, x2, y2 = resize_boxes(np.array([[float(x1), float(y1), float(x2), float(y2)]]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+            if draw_text:
+                cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), int(2*scale))
+            if write_text:
+                cv2.putText(image_copy, text, (int(x1 + int(2*scale)), int((y1+y2)/2) ), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (0,0, 0), 2)
+    '''Draws links between objects based on the full prediction data.'''
+    #check if keypoints detected are the same
+    if draw_twins and prediction is not None:
+        # Pre-calculate indices for performance
+        circle_color = (0, 255, 0)  # Green color for the circle
+        circle_radius = int(10 * scale)  # Circle radius scaled by image scale
+        for idx, (key1, key2) in enumerate(prediction['keypoints']):
+            if prediction['labels'][idx] not in [list(class_dict.values()).index('sequenceFlow'),
+                    list(class_dict.values()).index('messageFlow'),
+                    list(class_dict.values()).index('dataAssociation')]:
+                continue
+            # Calculate the Euclidean distance between the two keypoints
+            distance = np.linalg.norm(key1[:2] - key2[:2])
+            if distance < 10:
+                x_new,y_new, x,y = find_other_keypoint(idx,prediction)
+                cv2.circle(image_copy, (int(x), int(y)), circle_radius, circle_color, -1)
+                cv2.circle(image_copy, (int(x_new), int(y_new)), circle_radius, (0,0,0), -1)
+    # Draw links between objects
+    if draw_links==True and prediction is not None:
+        for i, (start_idx, end_idx) in enumerate(prediction['links']):
+            if start_idx is None or end_idx is None:
+                continue
+            start_box = prediction['boxes'][start_idx]
+            start_box = resize_boxes(np.array([start_box]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+            end_box = prediction['boxes'][end_idx]
+            end_box = resize_boxes(np.array([end_box]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+            current_box = prediction['boxes'][i]
+            current_box = resize_boxes(np.array([current_box]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+            # Calculate the center of each bounding box
+            start_center = ((start_box[0] + start_box[2]) // 2, (start_box[1] + start_box[3]) // 2)
+            end_center = ((end_box[0] + end_box[2]) // 2, (end_box[1] + end_box[3]) // 2)
+            current_center = ((current_box[0] + current_box[2]) // 2, (current_box[1] + current_box[3]) // 2)
+            # Draw a line between the centers of the connected objects
+            cv2.line(image_copy, (int(start_center[0]), int(start_center[1])), (int(current_center[0]), int(current_center[1])), (0, 0, 255), int(2*scale))
+            cv2.line(image_copy, (int(current_center[0]), int(current_center[1])), (int(end_center[0]), int(end_center[1])), (255, 0, 0), int(2*scale))
+    if draw_grouped_text and prediction is not None:
+        task_boxes = task_boxes = [box for i, box in enumerate(prediction['boxes']) if prediction['labels'][i] == list(class_dict.values()).index('task')]
+        grouped_sentences, sentence_bounding_boxes, info_texts, info_boxes = group_texts(task_boxes, text_predictions[0], text_predictions[1], percentage_thresh=1)
+        for i in range(len(info_boxes)):
+            x1, y1, x2, y2 = info_boxes[i]
+            x1, y1, x2, y2 = resize_boxes(np.array([[float(x1), float(y1), float(x2), float(y2)]]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+            cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), int(2*scale))
+        for i in range(len(sentence_bounding_boxes)):
+            x1,y1,x2,y2 = sentence_bounding_boxes[i]
+            x1, y1, x2, y2 = resize_boxes(np.array([[float(x1), float(y1), float(x2), float(y2)]]), (new_scaled_size[1], new_scaled_size[0]), (image_copy.shape[1],image_copy.shape[0]))[0]
+            cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), int(2*scale))
+    if return_image:
+        return image_copy
+    else:
+        # Display the image
+        plt.figure(figsize=(12, 12))
+        plt.imshow(image_copy)
+        if axis==False:
+            plt.axis('off')
+        plt.show()

eval.py ADDED Viewed

	@@ -0,0 +1,649 @@

+import numpy as np
+import torch
+from utils import class_dict, object_dict, arrow_dict, find_closest_object, find_other_keypoint, filter_overlap_boxes, iou
+from tqdm import tqdm
+from toXML import create_BPMN_id
+def non_maximum_suppression(boxes, scores, labels=None, iou_threshold=0.5):
+    idxs = np.argsort(scores)  # Sort the boxes according to their scores in ascending order
+    selected_boxes = []
+    while len(idxs) > 0:
+        last = len(idxs) - 1
+        i = idxs[last]
+        # Skip if the label is a lane
+        if labels is not None and class_dict[labels[i]] == 'lane':
+            selected_boxes.append(i)
+            idxs = np.delete(idxs, last)
+            continue
+        selected_boxes.append(i)
+        # Find the intersection of the box with the rest
+        suppress = [last]
+        for pos in range(0, last):
+            j = idxs[pos]
+            if iou(boxes[i], boxes[j]) > iou_threshold:
+                suppress.append(pos)
+        idxs = np.delete(idxs, suppress)
+    # Return only the boxes that were selected
+    return selected_boxes
+def keypoint_correction(keypoints, boxes, labels, model_dict=arrow_dict, distance_treshold=15):
+    for idx, (key1, key2) in enumerate(keypoints):
+            if labels[idx] not in [list(model_dict.values()).index('sequenceFlow'),
+                        list(model_dict.values()).index('messageFlow'),
+                        list(model_dict.values()).index('dataAssociation')]:
+                continue
+            # Calculate the Euclidean distance between the two keypoints
+            distance = np.linalg.norm(key1[:2] - key2[:2])
+            if distance < distance_treshold:
+                print('Key modified for index:', idx)
+                x_new,y_new, x,y = find_other_keypoint(idx, keypoints, boxes)
+                keypoints[idx][0][:2] = [x_new,y_new]
+                keypoints[idx][1][:2] = [x,y]
+    return keypoints
+def object_prediction(model, image, score_threshold=0.5, iou_threshold=0.5):
+    model.eval()
+    with torch.no_grad():
+        image_tensor = image.unsqueeze(0).to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
+        predictions = model(image_tensor)
+        boxes = predictions[0]['boxes'].cpu().numpy()
+        labels = predictions[0]['labels'].cpu().numpy()
+        scores = predictions[0]['scores'].cpu().numpy()
+        idx = np.where(scores > score_threshold)[0]
+        boxes = boxes[idx]
+        scores = scores[idx]
+        labels = labels[idx]
+        selected_boxes = non_maximum_suppression(boxes, scores, labels=labels, iou_threshold=iou_threshold)
+        #find orientation of the task by checking the size of all the boxes and delete the one that are not in the same orientation
+        vertical = 0
+        for i in range(len(labels)):
+            if labels[i] != list(object_dict.values()).index('task'):
+                continue
+            if boxes[i][2]-boxes[i][0] < boxes[i][3]-boxes[i][1]:
+                vertical += 1
+        horizontal = len(labels) - vertical
+        for i in range(len(labels)):
+            if labels[i] != list(object_dict.values()).index('task'):
+                continue
+            if vertical < horizontal:
+                if boxes[i][2]-boxes[i][0] < boxes[i][3]-boxes[i][1]:
+                    #find the element in the list and remove it
+                    if i in selected_boxes:
+                        selected_boxes.remove(i)
+            elif vertical > horizontal:
+                if boxes[i][2]-boxes[i][0] > boxes[i][3]-boxes[i][1]:
+                    #find the element in the list and remove it
+                    if i in selected_boxes:
+                        selected_boxes.remove(i)
+            else:
+                pass
+        boxes = boxes[selected_boxes]
+        scores = scores[selected_boxes]
+        labels = labels[selected_boxes]
+        prediction = {
+            'boxes': boxes,
+            'scores': scores,
+            'labels': labels,
+        }
+    image = image.permute(1, 2, 0).cpu().numpy()
+    image = (image * 255).astype(np.uint8)
+    return image, prediction
+def arrow_prediction(model, image, score_threshold=0.5, iou_threshold=0.5, distance_treshold=15):
+    model.eval()
+    with torch.no_grad():
+        image_tensor = image.unsqueeze(0).to(torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'))
+        predictions = model(image_tensor)
+        boxes = predictions[0]['boxes'].cpu().numpy()
+        labels = predictions[0]['labels'].cpu().numpy() + (len(object_dict) - 1)
+        scores = predictions[0]['scores'].cpu().numpy()
+        keypoints = predictions[0]['keypoints'].cpu().numpy()
+        idx = np.where(scores > score_threshold)[0]
+        boxes = boxes[idx]
+        scores = scores[idx]
+        labels = labels[idx]
+        keypoints = keypoints[idx]
+        selected_boxes = non_maximum_suppression(boxes, scores, iou_threshold=iou_threshold)
+        boxes = boxes[selected_boxes]
+        scores = scores[selected_boxes]
+        labels = labels[selected_boxes]
+        keypoints = keypoints[selected_boxes]
+        keypoints = keypoint_correction(keypoints, boxes, labels, class_dict, distance_treshold=distance_treshold)
+        prediction = {
+            'boxes': boxes,
+            'scores': scores,
+            'labels': labels,
+            'keypoints': keypoints,
+        }
+    image = image.permute(1, 2, 0).cpu().numpy()
+    image = (image * 255).astype(np.uint8)
+    return image, prediction
+def mix_predictions(objects_pred, arrow_pred):
+    # Initialize the list of lists for keypoints
+    object_keypoints = []
+    # Number of boxes
+    num_boxes = len(objects_pred['boxes'])
+    # Iterate over the number of boxes
+    for _ in range(num_boxes):
+        # Each box has 2 keypoints, both initialized to [0, 0, 0]
+        keypoints = [[0, 0, 0], [0, 0, 0]]
+        object_keypoints.append(keypoints)
+    #concatenate the two predictions
+    boxes = np.concatenate((objects_pred['boxes'], arrow_pred['boxes']))
+    labels = np.concatenate((objects_pred['labels'], arrow_pred['labels']))
+    scores = np.concatenate((objects_pred['scores'], arrow_pred['scores']))
+    keypoints = np.concatenate((object_keypoints, arrow_pred['keypoints']))
+    return boxes, labels, scores, keypoints
+def regroup_elements_by_pool(boxes, labels, class_dict):
+    """
+    Regroups elements by the pool they belong to, and creates a single new pool for elements that are not in any existing pool.
+    Parameters:
+    - boxes (list): List of bounding boxes.
+    - labels (list): List of labels corresponding to each bounding box.
+    - class_dict (dict): Dictionary mapping class indices to class names.
+    Returns:
+    - dict: A dictionary where each key is a pool's index and the value is a list of elements within that pool.
+    """
+    # Initialize a dictionary to hold the elements in each pool
+    pool_dict = {}
+    # Identify the bounding boxes of the pools
+    pool_indices = [i for i, label in enumerate(labels) if (class_dict[label.item()] == 'pool')]
+    pool_boxes = [boxes[i] for i in pool_indices]
+    if not pool_indices:
+        # If no pools or lanes are detected, create a single pool with all elements
+        labels = np.append(labels, list(class_dict.values()).index('pool'))
+        pool_dict[len(labels)-1] = list(range(len(boxes)))
+    else:
+        # Initialize each pool index with an empty list
+        for pool_index in pool_indices:
+            pool_dict[pool_index] = []
+        # Initialize a list for elements not in any pool
+        elements_not_in_pool = []
+        # Iterate over all elements
+        for i, box in enumerate(boxes):
+            if i in pool_indices or class_dict[labels[i]] == 'messageFlow':
+                continue  # Skip pool boxes themselves and messageFlow elements
+            assigned_to_pool = False
+            for j, pool_box in enumerate(pool_boxes):
+                # Check if the element is within the pool's bounding box
+                if (box[0] >= pool_box[0] and box[1] >= pool_box[1] and
+                    box[2] <= pool_box[2] and box[3] <= pool_box[3]):
+                    pool_index = pool_indices[j]
+                    pool_dict[pool_index].append(i)
+                    assigned_to_pool = True
+                    break
+            if not assigned_to_pool:
+                if class_dict[labels[i]] != 'messageFlow' and class_dict[labels[i]] != 'lane':
+                    elements_not_in_pool.append(i)
+        if elements_not_in_pool:
+            new_pool_index = max(pool_dict.keys()) + 1
+            labels = np.append(labels, list(class_dict.values()).index('pool'))
+            pool_dict[new_pool_index] = elements_not_in_pool
+    # Separate empty pools
+    non_empty_pools = {k: v for k, v in pool_dict.items() if v}
+    empty_pools = {k: v for k, v in pool_dict.items() if not v}
+    # Merge non-empty pools followed by empty pools
+    pool_dict = {**non_empty_pools, **empty_pools}
+    return pool_dict, labels
+def create_links(keypoints, boxes, labels, class_dict):
+    best_points = []
+    links = []
+    for i in range(len(labels)):
+        if labels[i]==list(class_dict.values()).index('sequenceFlow') or labels[i]==list(class_dict.values()).index('messageFlow'):
+            closest1, point_start = find_closest_object(keypoints[i][0], boxes, labels)
+            closest2, point_end = find_closest_object(keypoints[i][1], boxes, labels)
+            if closest1 is not None and closest2 is not None:
+                best_points.append([point_start, point_end])
+                links.append([closest1, closest2])
+        else:
+            best_points.append([None,None])
+            links.append([None,None])
+    for i in range(len(labels)):
+        if labels[i]==list(class_dict.values()).index('dataAssociation'):
+            closest1, point_start = find_closest_object(keypoints[i][0], boxes, labels)
+            closest2, point_end = find_closest_object(keypoints[i][1], boxes, labels)
+            if closest1 is not None and closest2 is not None:
+                best_points[i] = ([point_start, point_end])
+                links[i] = ([closest1, closest2])
+    return links, best_points
+def correction_labels(boxes, labels, class_dict, pool_dict, flow_links):
+    for pool_index, elements in pool_dict.items():
+        print(f"Pool {pool_index} contains elements: {elements}")
+        #check if each link is in the same pool
+        for i in range(len(flow_links)):
+            if labels[i] == list(class_dict.values()).index('sequenceFlow'):
+                id1, id2 = flow_links[i]
+                if (id1 and id2) is not None:
+                    if id1 in elements and id2 in elements:
+                        continue
+                    elif id1 not in elements and id2 not in elements:
+                        continue
+                    else:
+                        print('change the link from sequenceFlow to messageFlow')
+                        labels[i]=list(class_dict.values()).index('messageFlow')
+    return labels, flow_links
+def last_correction(boxes, labels, scores, keypoints, links, best_points, pool_dict):
+    #delete pool that are have only messageFlow on it
+    delete_pool = []
+    for pool_index, elements in pool_dict.items():
+        if all([labels[i] == list(class_dict.values()).index('messageFlow') for i in elements]):
+            if len(elements) > 0:
+                delete_pool.append(pool_dict[pool_index])
+                print(f"Pool {pool_index} contains only messageFlow elements, deleting it")
+    #sort index
+    delete_pool = sorted(delete_pool, reverse=True)
+    for pool in delete_pool:
+        index = list(pool_dict.keys())[list(pool_dict.values()).index(pool)]
+        del pool_dict[index]
+    delete_elements = []
+    # Check if there is an arrow that has the same links
+    for i in range(len(labels)):
+        for j in range(i+1, len(labels)):
+            if labels[i] == list(class_dict.values()).index('sequenceFlow') and labels[j] == list(class_dict.values()).index('sequenceFlow'):
+                if links[i] == links[j]:
+                    print(f'element {i} and {j} have the same links')
+                    if scores[i] > scores[j]:
+                        print('delete element', j)
+                        delete_elements.append(j)
+                    else:
+                        print('delete element', i)
+                        delete_elements.append(i)
+    boxes = np.delete(boxes, delete_elements, axis=0)
+    labels = np.delete(labels, delete_elements)
+    scores = np.delete(scores, delete_elements)
+    keypoints = np.delete(keypoints, delete_elements, axis=0)
+    links = np.delete(links, delete_elements, axis=0)
+    best_points = [point for i, point in enumerate(best_points) if i not in delete_elements]
+    #also delete the element in the pool_dict
+    for pool_index, elements in pool_dict.items():
+        pool_dict[pool_index] = [i for i in elements if i not in delete_elements]
+    return boxes, labels, scores, keypoints, links, best_points, pool_dict
+def give_link_to_element(links, labels):
+    #give a link to event to allow the creation of the BPMN id with start, indermediate and end event
+        for i in range(len(links)):
+            if labels[i] == list(class_dict.values()).index('sequenceFlow'):
+                id1, id2 = links[i]
+                if (id1 and id2) is not None:
+                        links[id1][1] = i
+                        links[id2][0] = i
+        return links
+def full_prediction(model_object, model_arrow, image, score_threshold=0.5, iou_threshold=0.5, resize=True, distance_treshold=15):
+    model_object.eval()  # Set the model to evaluation mode
+    model_arrow.eval()  # Set the model to evaluation mode
+    # Load an image
+    with torch.no_grad():  # Disable gradient calculation for inference
+        _, objects_pred = object_prediction(model_object, image, score_threshold=score_threshold, iou_threshold=iou_threshold)
+        _, arrow_pred = arrow_prediction(model_arrow, image, score_threshold=score_threshold, iou_threshold=iou_threshold, distance_treshold=distance_treshold)
+        #print('Object prediction:', objects_pred)
+        boxes, labels, scores, keypoints = mix_predictions(objects_pred, arrow_pred)
+        # Regroup elements by pool
+        pool_dict, labels = regroup_elements_by_pool(boxes,labels, class_dict)
+        # Create links between elements
+        flow_links, best_points = create_links(keypoints, boxes, labels, class_dict)
+        #Correct the labels of some sequenceflow that cross multiple pool
+        labels, flow_links = correction_labels(boxes, labels, class_dict, pool_dict, flow_links)
+        #give a link to event to allow the creation of the BPMN id with start, indermediate and end event
+        flow_links = give_link_to_element(flow_links, labels)
+        boxes,labels,scores,keypoints,flow_links,best_points,pool_dict = last_correction(boxes,labels,scores,keypoints,flow_links,best_points, pool_dict)
+        image = image.permute(1, 2, 0).cpu().numpy()
+        image = (image * 255).astype(np.uint8)
+        idx = []
+        for i in range(len(labels)):
+            idx.append(i)
+        bpmn_id = [class_dict[labels[i]] for i in range(len(labels))]
+        data = {
+            'image': image,
+            'idx': idx,
+            'boxes': boxes,
+            'labels': labels,
+            'scores': scores,
+            'keypoints': keypoints,
+            'links': flow_links,
+            'best_points': best_points,
+            'pool_dict': pool_dict,
+            'BPMN_id': bpmn_id,
+        }
+        # give a unique BPMN id to each element
+        data = create_BPMN_id(data)
+        return image, data
+def evaluate_model_by_class(pred_boxes, true_boxes, pred_labels, true_labels, model_dict, iou_threshold=0.5):
+    # Initialize dictionaries to hold per-class counts
+    class_tp = {cls: 0 for cls in model_dict.values()}
+    class_fp = {cls: 0 for cls in model_dict.values()}
+    class_fn = {cls: 0 for cls in model_dict.values()}
+    # Track which true boxes have been matched
+    matched = [False] * len(true_boxes)
+    # Check each prediction against true boxes
+    for pred_box, pred_label in zip(pred_boxes, pred_labels):
+        match_found = False
+        for idx, (true_box, true_label) in enumerate(zip(true_boxes, true_labels)):
+            if not matched[idx] and pred_label == true_label:
+                if iou(np.array(pred_box), np.array(true_box)) >= iou_threshold:
+                    class_tp[model_dict[pred_label]] += 1
+                    matched[idx] = True
+                    match_found = True
+                    break
+        if not match_found:
+            class_fp[model_dict[pred_label]] += 1
+    # Count false negatives
+    for idx, (true_box, true_label) in enumerate(zip(true_boxes, true_labels)):
+        if not matched[idx]:
+            class_fn[model_dict[true_label]] += 1
+    # Calculate precision, recall, and F1-score per class
+    class_precision = {}
+    class_recall = {}
+    class_f1_score = {}
+    for cls in model_dict.values():
+        precision = class_tp[cls] / (class_tp[cls] + class_fp[cls]) if class_tp[cls] + class_fp[cls] > 0 else 0
+        recall = class_tp[cls] / (class_tp[cls] + class_fn[cls]) if class_tp[cls] + class_fn[cls] > 0 else 0
+        f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
+        class_precision[cls] = precision
+        class_recall[cls] = recall
+        class_f1_score[cls] = f1_score
+    return class_precision, class_recall, class_f1_score
+def keypoints_mesure(pred_boxes, pred_box, true_boxes, true_box, pred_keypoints, true_keypoints, distance_threshold=5):
+    result = 0
+    reverted = False
+    #find the position of keypoints in the list
+    idx = np.where(pred_boxes == pred_box)[0][0]
+    idx2 = np.where(true_boxes == true_box)[0][0]
+    keypoint1_pred = pred_keypoints[idx][0]
+    keypoint1_true = true_keypoints[idx2][0]
+    keypoint2_pred = pred_keypoints[idx][1]
+    keypoint2_true = true_keypoints[idx2][1]
+    distance1 = np.linalg.norm(keypoint1_pred[:2] - keypoint1_true[:2])
+    distance2 = np.linalg.norm(keypoint2_pred[:2] - keypoint2_true[:2])
+    distance3 = np.linalg.norm(keypoint1_pred[:2] - keypoint2_true[:2])
+    distance4 = np.linalg.norm(keypoint2_pred[:2] - keypoint1_true[:2])
+    if distance1 < distance_threshold:
+        result += 1
+    if distance2 < distance_threshold:
+        result += 1
+    if distance3 < distance_threshold or distance4 < distance_threshold:
+        reverted = True
+    return result, reverted
+def evaluate_single_image(pred_boxes, true_boxes, pred_labels, true_labels, pred_keypoints, true_keypoints, iou_threshold=0.5, distance_threshold=5):
+    tp, fp, fn = 0, 0, 0
+    key_t, key_f = 0, 0
+    labels_t, labels_f = 0, 0
+    reverted_tot = 0
+    matched_true_boxes = set()
+    for pred_idx, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
+        match_found = False
+        for true_idx, true_box in enumerate(true_boxes):
+            if true_idx in matched_true_boxes:
+                continue
+            iou_val = iou(pred_box, true_box)
+            if iou_val >= iou_threshold:
+                if true_keypoints is not None and pred_keypoints is not None:
+                    key_result, reverted = keypoints_mesure(pred_boxes, pred_box, true_boxes, true_box, pred_keypoints, true_keypoints, distance_threshold)
+                    key_t += key_result
+                    key_f += 2 - key_result
+                    if reverted:
+                        reverted_tot += 1
+                match_found = True
+                matched_true_boxes.add(true_idx)
+                if pred_label == true_labels[true_idx]:
+                    labels_t += 1
+                else:
+                    labels_f += 1
+                tp += 1
+                break
+        if not match_found:
+            fp += 1
+    fn = len(true_boxes) - tp
+    return tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted_tot
+def pred_4_evaluation(model, loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=5, key_correction=True, model_type='object'):
+    model.eval()
+    tp, fp, fn = 0, 0, 0
+    labels_t, labels_f = 0, 0
+    key_t, key_f = 0, 0
+    reverted = 0
+    with torch.no_grad():
+        for images, targets_im in tqdm(loader, desc="Testing... "):  # Wrap the loader with tqdm
+            devices = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+            images = [image.to(devices) for image in images]
+            targets = [{k: v.clone().detach().to(devices) for k, v in t.items()} for t in targets_im]
+            predictions = model(images)
+            for target, prediction in zip(targets, predictions):
+                true_boxes = target['boxes'].cpu().numpy()
+                true_labels = target['labels'].cpu().numpy()
+                if 'keypoints' in target:
+                    true_keypoints = target['keypoints'].cpu().numpy()
+                pred_boxes = prediction['boxes'].cpu().numpy()
+                scores = prediction['scores'].cpu().numpy()
+                pred_labels = prediction['labels'].cpu().numpy()
+                if 'keypoints' in prediction:
+                    pred_keypoints = prediction['keypoints'].cpu().numpy()
+                selected_boxes = non_maximum_suppression(pred_boxes, scores, iou_threshold=iou_threshold)
+                pred_boxes = pred_boxes[selected_boxes]
+                scores = scores[selected_boxes]
+                pred_labels = pred_labels[selected_boxes]
+                if 'keypoints' in prediction:
+                    pred_keypoints = pred_keypoints[selected_boxes]
+                filtered_boxes = []
+                filtered_labels = []
+                filtered_keypoints = []
+                if 'keypoints' not in prediction:
+                    #create a list of zeros of length equal to the number of boxes
+                    pred_keypoints = [np.zeros((2, 3)) for _ in range(len(pred_boxes))]
+                for box, score, label, keypoints in zip(pred_boxes, scores, pred_labels, pred_keypoints):
+                    if score >= score_threshold:
+                        filtered_boxes.append(box)
+                        filtered_labels.append(label)
+                        if 'keypoints' in prediction:
+                            filtered_keypoints.append(keypoints)
+                if key_correction and ('keypoints' in prediction):
+                    filtered_keypoints = keypoint_correction(filtered_keypoints, filtered_boxes, filtered_labels)
+                if 'keypoints' not in target:
+                    filtered_keypoints = None
+                    true_keypoints = None
+                tp_img, fp_img, fn_img, labels_t_img, labels_f_img, key_t_img, key_f_img, reverted_img = evaluate_single_image(
+                    filtered_boxes, true_boxes, filtered_labels, true_labels, filtered_keypoints, true_keypoints, iou_threshold, distance_threshold)
+                tp += tp_img
+                fp += fp_img
+                fn += fn_img
+                labels_t += labels_t_img
+                labels_f += labels_f_img
+                key_t += key_t_img
+                key_f += key_f_img
+                reverted += reverted_img
+    return tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted
+def main_evaluation(model, test_loader, score_threshold=0.5, iou_threshold=0.5, distance_threshold=5, key_correction=True, model_type = 'object'):
+    tp, fp, fn, labels_t, labels_f, key_t, key_f, reverted = pred_4_evaluation(model, test_loader, score_threshold, iou_threshold, distance_threshold, key_correction, model_type)
+    labels_precision = labels_t / (labels_t + labels_f) if (labels_t + labels_f) > 0 else 0
+    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
+    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
+    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+    if model_type == 'arrow':
+        key_accuracy = key_t / (key_t + key_f) if (key_t + key_f) > 0 else 0
+        reverted_accuracy = reverted / (key_t + key_f) if (key_t + key_f) > 0 else 0
+    else:
+        key_accuracy = 0
+        reverted_accuracy = 0
+    return labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy
+def evaluate_model_by_class_single_image(pred_boxes, true_boxes, pred_labels, true_labels, class_tp, class_fp, class_fn, model_dict, iou_threshold=0.5):
+    matched_true_boxes = set()
+    for pred_idx, (pred_box, pred_label) in enumerate(zip(pred_boxes, pred_labels)):
+        match_found = False
+        for true_idx, (true_box, true_label) in enumerate(zip(true_boxes, true_labels)):
+            if true_idx in matched_true_boxes:
+                continue
+            if pred_label == true_label and iou(np.array(pred_box), np.array(true_box)) >= iou_threshold:
+                class_tp[model_dict[pred_label]] += 1
+                matched_true_boxes.add(true_idx)
+                match_found = True
+                break
+        if not match_found:
+            class_fp[model_dict[pred_label]] += 1
+    for idx, true_label in enumerate(true_labels):
+        if idx not in matched_true_boxes:
+            class_fn[model_dict[true_label]] += 1
+def pred_4_evaluation_per_class(model, loader, score_threshold=0.5, iou_threshold=0.5):
+    model.eval()
+    with torch.no_grad():
+        for images, targets_im in tqdm(loader, desc="Testing... "):
+            devices = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+            images = [image.to(devices) for image in images]
+            targets = [{k: v.clone().detach().to(devices) for k, v in t.items()} for t in targets_im]
+            predictions = model(images)
+            for target, prediction in zip(targets, predictions):
+                true_boxes = target['boxes'].cpu().numpy()
+                true_labels = target['labels'].cpu().numpy()
+                pred_boxes = prediction['boxes'].cpu().numpy()
+                scores = prediction['scores'].cpu().numpy()
+                pred_labels = prediction['labels'].cpu().numpy()
+                idx = np.where(scores > score_threshold)[0]
+                pred_boxes = pred_boxes[idx]
+                scores = scores[idx]
+                pred_labels = pred_labels[idx]
+                selected_boxes = non_maximum_suppression(pred_boxes, scores, iou_threshold=iou_threshold)
+                pred_boxes = pred_boxes[selected_boxes]
+                scores = scores[selected_boxes]
+                pred_labels = pred_labels[selected_boxes]
+                yield pred_boxes, true_boxes, pred_labels, true_labels
+def evaluate_model_by_class(model, test_loader, model_dict, score_threshold=0.5, iou_threshold=0.5):
+    class_tp = {cls: 0 for cls in model_dict.values()}
+    class_fp = {cls: 0 for cls in model_dict.values()}
+    class_fn = {cls: 0 for cls in model_dict.values()}
+    for pred_boxes, true_boxes, pred_labels, true_labels in pred_4_evaluation_per_class(model, test_loader, score_threshold, iou_threshold):
+        evaluate_model_by_class_single_image(pred_boxes, true_boxes, pred_labels, true_labels, class_tp, class_fp, class_fn, model_dict, iou_threshold)
+    class_precision = {}
+    class_recall = {}
+    class_f1_score = {}
+    for cls in model_dict.values():
+        precision = class_tp[cls] / (class_tp[cls] + class_fp[cls]) if class_tp[cls] + class_fp[cls] > 0 else 0
+        recall = class_tp[cls] / (class_tp[cls] + class_fn[cls]) if class_tp[cls] + class_fn[cls] > 0 else 0
+        f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
+        class_precision[cls] = precision
+        class_recall[cls] = recall
+        class_f1_score[cls] = f1_score
+    return class_precision, class_recall, class_f1_score

flask.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from flask import Flask
+app = Flask(__name__)
+@app.route("/")
+def hello():
+    return "Hello World!\n"

htlm_webpage.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import streamlit as st
+import streamlit.components.v1 as components
+def display_bpmn_xml(bpmn_xml):
+    html_template = f"""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <meta charset="UTF-8">
+        <title>BPMN Modeler</title>
+        <link rel="stylesheet" href="https://unpkg.com/bpmn-js/dist/assets/diagram-js.css">
+        <link rel="stylesheet" href="https://unpkg.com/bpmn-js/dist/assets/bpmn-font/css/bpmn-embedded.css">
+        <script src="https://unpkg.com/bpmn-js/dist/bpmn-modeler.development.js"></script>
+        <style>
+            html, body {{
+                height: 100%;
+                padding: 0;
+                margin: 0;
+                font-family: Arial, sans-serif;
+                display: flex;
+                flex-direction: column;
+                overflow: hidden;
+            }}
+            #button-container {{
+                padding: 10px;
+                background-color: #ffffff;
+                border-bottom: 1px solid #ddd;
+                display: flex;
+                justify-content: flex-start;
+                gap: 10px;
+            }}
+            #save-button, #download-button {{
+                background-color: #4CAF50;
+                color: white;
+                border: none;
+                padding: 10px 20px;
+                text-align: center;
+                text-decoration: none;
+                display: inline-block;
+                font-size: 16px;
+                margin: 4px 2px;
+                cursor: pointer;
+                border-radius: 8px;
+            }}
+            #download-button {{
+                background-color: #008CBA;
+            }}
+            #canvas-container {{
+                flex: 1;
+                position: relative;
+                background-color: #FBFBFB;
+                overflow: hidden; /* Prevent scrolling */
+                display: flex;
+                justify-content: center;
+                align-items: center;
+            }}
+            #canvas {{
+                height: 100%;
+                width: 100%;
+                position: relative;
+            }}
+        </style>
+    </head>
+    <body>
+        <div id="button-container">
+            <button id="save-button">Save as BPMN</button>
+            <button id="download-button">Save as XML</button>
+            <button id="download-button">Save as Vizi</button>
+        </div>
+        <div id="canvas-container">
+            <div id="canvas"></div>
+        </div>
+        <script>
+            var bpmnModeler = new BpmnJS({{
+                container: '#canvas'
+            }});
+            async function openDiagram(bpmnXML) {{
+                try {{
+                    await bpmnModeler.importXML(bpmnXML);
+                    bpmnModeler.get('canvas').zoom('fit-viewport');
+                    bpmnModeler.get('canvas').zoom(0.8); // Adjust this value for zooming out
+                }} catch (err) {{
+                    console.error('Error rendering BPMN diagram', err);
+                }}
+            }}
+            async function saveDiagram() {{
+                try {{
+                    const result = await bpmnModeler.saveXML({{ format: true }});
+                    const xml = result.xml;
+                    const blob = new Blob([xml], {{ type: 'text/xml' }});
+                    const url = URL.createObjectURL(blob);
+                    const a = document.createElement('a');
+                    a.href = url;
+                    a.download = 'diagram.bpmn';
+                    document.body.appendChild(a);
+                    a.click();
+                    document.body.removeChild(a);
+                }} catch (err) {{
+                    console.error('Error saving BPMN diagram', err);
+                }}
+            }}
+            async function downloadXML() {{
+                const xml = `{bpmn_xml}`;
+                const blob = new Blob([xml], {{ type: 'text/xml' }});
+                const url = URL.createObjectURL(blob);
+                const a = document.createElement('a');
+                a.href = url;
+                a.download = 'diagram.xml';
+                document.body.appendChild(a);
+                a.click();
+                document.body.removeChild(a);
+            }}
+            document.getElementById('save-button').addEventListener('click', saveDiagram);
+            document.getElementById('download-button').addEventListener('click', downloadXML);
+            // Ensure the canvas is focused to capture keyboard events
+            document.getElementById('canvas').focus();
+            // Add event listeners for keyboard shortcuts
+            document.addEventListener('keydown', function(event) {{
+                if (event.ctrlKey && event.key === 'z') {{
+                    bpmnModeler.get('commandStack').undo();
+                }} else if (event.key === 'Delete' || event.key === 'Backspace') {{
+                    bpmnModeler.get('selection').get().forEach(function(element) {{
+                        bpmnModeler.get('modeling').removeElements([element]);
+                    }});
+                }}
+            }});
+            openDiagram(`{bpmn_xml}`);
+        </script>
+    </body>
+    </html>
+    """
+    components.html(html_template, height=1000, width=1500)

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ libgl1-mesa-glx

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+yamlu==0.0.17
+tqdm==4.66.4
+torchvision==0.18.0
+azure-ai-vision-imageanalysis==1.0.0b2
+streamlit==1.35.0
+streamlit-image-comparison==0.0.4
+streamlit-cropper==0.2.2
+streamlit-drawable-canvas==0.9.3
+opencv-python
+gdown

toXML.py ADDED Viewed

	@@ -0,0 +1,351 @@

+import xml.etree.ElementTree as ET
+from utils import class_dict
+def rescale(scale, boxes):
+    for i in range(len(boxes)):
+                boxes[i] = [boxes[i][0]*scale,
+                            boxes[i][1]*scale,
+                            boxes[i][2]*scale,
+                            boxes[i][3]*scale]
+    return boxes
+def create_BPMN_id(data):
+    enums = {
+        'end_event': 1,
+        'start_event': 1,
+        'task': 1,
+        'sequenceFlow': 1,
+        'messageFlow': 1,
+        'message_event': 1,
+        'exclusiveGateway': 1,
+        'parallelGateway': 1,
+        'dataAssociation': 1,
+        'pool': 1,
+        'dataObject': 1,
+        'timerEvent': 1
+    }
+    BPMN_name = [class_dict[label] for label in data['labels']]
+    for idx, Bpmn_id in enumerate(BPMN_name):
+        if Bpmn_id == 'event':
+            if data['links'][idx][0] is not None and data['links'][idx][1] is None:
+                key = 'end_event'
+            elif data['links'][idx][0] is None and data['links'][idx][1] is not None:
+                key = 'start_event'
+        else:
+            key = {
+                'task': 'task',
+                'dataObject': 'dataObject',
+                'sequenceFlow': 'sequenceFlow',
+                'messageFlow': 'messageFlow',
+                'messageEvent': 'message_event',
+                'exclusiveGateway': 'exclusiveGateway',
+                'parallelGateway': 'parallelGateway',
+                'dataAssociation': 'dataAssociation',
+                'pool': 'pool',
+                'timerEvent': 'timerEvent'
+            }.get(Bpmn_id, None)
+        if key:
+            data['BPMN_id'][idx] = f'{key}_{enums[key]}'
+            enums[key] += 1
+    return data
+def add_diagram_elements(parent, element_id, x, y, width, height):
+    """Utility to add BPMN diagram notation for elements."""
+    shape = ET.SubElement(parent, 'bpmndi:BPMNShape', attrib={
+        'bpmnElement': element_id,
+        'id': element_id + '_di'
+    })
+    bounds = ET.SubElement(shape, 'dc:Bounds', attrib={
+        'x': str(x),
+        'y': str(y),
+        'width': str(width),
+        'height': str(height)
+    })
+def add_diagram_edge(parent, element_id, waypoints):
+    """Utility to add BPMN diagram notation for sequence flows."""
+    edge = ET.SubElement(parent, 'bpmndi:BPMNEdge', attrib={
+        'bpmnElement': element_id,
+        'id': element_id + '_di'
+    })
+    for x, y in waypoints:
+        ET.SubElement(edge, 'di:waypoint', attrib={
+            'x': str(x),
+            'y': str(y)
+        })
+def check_status(link, keep_elements):
+    if link[0] in keep_elements and link[1] in keep_elements:
+        return 'middle'
+    elif link[0] is None and link[1] in keep_elements:
+        return 'start'
+    elif link[0] in keep_elements and link[1] is None:
+        return 'end'
+    else:
+        return 'middle'
+def check_data_association(i, links, labels, keep_elements):
+    for j, (k,l) in enumerate(links):
+        if labels[j] == 14:
+            if k==i:
+                return 'output',j
+            elif l==i:
+                return 'input',j
+    return 'no association', None
+def create_data_Association(bpmn,data,size,element_id,source_id,target_id):
+    waypoints = calculate_waypoints(data, size, source_id, target_id)
+    add_diagram_edge(bpmn, element_id, waypoints)
+# Function to dynamically create and layout BPMN elements
+def create_bpmn_object(process, bpmnplane, text_mapping, definitions, size, data, keep_elements):
+    elements = data['BPMN_id']
+    positions = data['boxes']
+    links = data['links']
+    for i in keep_elements:
+        element_id = elements[i]
+        if element_id is None:
+            continue
+        element_type = element_id.split('_')[0]
+        x, y = positions[i][:2]
+        # Start Event
+        if element_type == 'start':
+            element = ET.SubElement(process, 'bpmn:startEvent', id=element_id, name=text_mapping[element_id])
+            add_diagram_elements(bpmnplane, element_id, x, y, size['start'][0], size['start'][1])
+        # Task
+        elif element_type == 'task':
+            element = ET.SubElement(process, 'bpmn:task', id=element_id, name=text_mapping[element_id])
+            status, dataAssociation_idx = check_data_association(i, data['links'], data['labels'], keep_elements)
+            # Handle Data Input Association
+            if status == 'input':
+                dataObject_idx = links[dataAssociation_idx][0]
+                dataObject_name = elements[dataObject_idx]
+                dataObject_ref = f'DataObjectReference_{dataObject_name.split("_")[1]}'
+                sub_element = ET.SubElement(element, 'bpmn:dataInputAssociation', id=f'dataInputAssociation_{dataObject_ref.split("_")[1]}')
+                ET.SubElement(sub_element, 'bpmn:sourceRef').text = dataObject_ref
+                create_data_Association(bpmnplane, data, size, sub_element.attrib['id'], dataObject_name, element_id)
+            # Handle Data Output Association
+            elif status == 'output':
+                dataObject_idx = links[dataAssociation_idx][1]
+                dataObject_name = elements[dataObject_idx]
+                dataObject_ref = f'DataObjectReference_{dataObject_name.split("_")[1]}'
+                sub_element = ET.SubElement(element, 'bpmn:dataOutputAssociation', id=f'dataOutputAssociation_{dataObject_ref.split("_")[1]}')
+                ET.SubElement(sub_element, 'bpmn:targetRef').text = dataObject_ref
+                create_data_Association(bpmnplane, data, size, sub_element.attrib['id'], element_id, dataObject_name)
+            add_diagram_elements(bpmnplane, element_id, x, y, size['task'][0], size['task'][1])
+        # Message Events (Start, Intermediate, End)
+        elif element_type == 'message':
+            status = check_status(links[i], keep_elements)
+            if status == 'start':
+                element = ET.SubElement(process, 'bpmn:startEvent', id=element_id, name=text_mapping[element_id])
+            elif status == 'middle':
+                element = ET.SubElement(process, 'bpmn:intermediateCatchEvent', id=element_id, name=text_mapping[element_id])
+            elif status == 'end':
+                element = ET.SubElement(process, 'bpmn:endEvent', id=element_id, name=text_mapping[element_id])
+            ET.SubElement(element, 'bpmn:messageEventDefinition', id=f'MessageEventDefinition_{i+1}')
+            add_diagram_elements(bpmnplane, element_id, x, y, size['message'][0], size['message'][1])
+        # End Event
+        elif element_type == 'end':
+            element = ET.SubElement(process, 'bpmn:endEvent', id=element_id, name=text_mapping[element_id])
+            add_diagram_elements(bpmnplane, element_id, x, y, size['end'][0], size['end'][1])
+        # Gateways (Exclusive, Parallel)
+        elif element_type in ['exclusiveGateway', 'parallelGateway']:
+            gateway_type = 'exclusiveGateway' if element_type == 'exclusiveGateway' else 'parallelGateway'
+            element = ET.SubElement(process, f'bpmn:{gateway_type}', id=element_id)
+            add_diagram_elements(bpmnplane, element_id, x, y, size[element_type][0], size[element_type][1])
+        # Data Object
+        elif element_type == 'dataObject':
+            dataObject_idx = element_id.split('_')[1]
+            dataObject_ref = f'DataObjectReference_{dataObject_idx}'
+            element = ET.SubElement(process, 'bpmn:dataObjectReference', id=dataObject_ref, dataObjectRef=element_id, name=text_mapping[element_id])
+            ET.SubElement(process, 'bpmn:dataObject', id=element_id)
+            add_diagram_elements(bpmnplane, dataObject_ref, x, y, size['dataObject'][0], size['dataObject'][1])
+        # Timer Event
+        elif element_type == 'timerEvent':
+            element = ET.SubElement(process, 'bpmn:intermediateCatchEvent', id=element_id, name=text_mapping[element_id])
+            ET.SubElement(element, 'bpmn:timerEventDefinition', id=f'TimerEventDefinition_{i+1}')
+            add_diagram_elements(bpmnplane, element_id, x, y, size['timerEvent'][0], size['timerEvent'][1])
+# Calculate simple waypoints between two elements (this function assumes direct horizontal links for simplicity)
+def calculate_waypoints(data, size, source_id, target_id):
+    source_idx = data['BPMN_id'].index(source_id)
+    target_idx = data['BPMN_id'].index(target_id)
+    name_source = source_id.split('_')[0]
+    name_target = target_id.split('_')[0]
+    #Get the position of the source and target
+    source_x, source_y = data['boxes'][source_idx][:2]
+    target_x, target_y = data['boxes'][target_idx][:2]
+    # Calculate relative position between source and target from their centers
+    relative_x = (target_x+size[name_target][0])/2 - (source_x+size[name_source][0])/2
+    relative_y = (target_y+size[name_target][1])/2 - (source_y+size[name_source][1])/2
+    # Get the size of the elements
+    size_x_source = size[name_source][0]
+    size_y_source = size[name_source][1]
+    size_x_target = size[name_target][0]
+    size_y_target = size[name_target][1]
+    #if it going to right
+    if relative_x >= size[name_source][0]:
+        source_x += size_x_source
+        source_y += size_y_source / 2
+        target_x  = target_x
+        target_y += size_y_target / 2
+        #if the source is going up
+        if relative_y < -size[name_source][1]:
+            source_x -= size_x_source / 2
+            source_y -= size_y_source / 2
+        #if the source is going down
+        elif relative_y > size[name_source][1]:
+            source_x -= size_x_source / 2
+            source_y += size_y_source / 2
+    #if it going to left
+    elif relative_x < -size[name_source][0]:
+        source_x = source_x
+        source_y += size_y_source / 2
+        target_x += size_x_target
+        target_y += size_y_target / 2
+        #if the source is going up
+        if relative_y < -size[name_source][1]:
+            source_x += size_x_source / 2
+            source_y -= size_y_source / 2
+        #if the source is going down
+        elif relative_y > size[name_source][1]:
+            source_x += size_x_source / 2
+            source_y += size_y_source / 2
+    #if it going up and down
+    elif -size[name_source][0] < relative_x < size[name_source][0]:
+        source_x += size_x_source / 2
+        target_x += size_x_target / 2
+        #if it's going down
+        if relative_y >= size[name_source][1]/2:
+            source_y += size_y_source
+        #if it's going up
+        elif relative_y < -size[name_source][1]/2:
+            source_y = source_y
+            target_y += size_y_target
+        else:
+            if relative_x >= 0:
+                source_x += size_x_source/2
+                source_y += size_y_source/2
+                target_x -= size_x_target/2
+                target_y += size_y_target/2
+            else:
+                source_x -= size_x_source/2
+                source_y += size_y_source/2
+                target_x += size_x_target/2
+                target_y += size_y_target/2
+    return [(source_x, source_y), (target_x, target_y)]
+def calculate_pool_bounds(data, keep_elements, size):
+    min_x = min_y = float('10000')
+    max_x = max_y = float('0')
+    for i in keep_elements:
+        if i >= len(data['BPMN_id']):
+            print("Problem with the index")
+            continue
+        element = data['BPMN_id'][i]
+        if element is None or data['labels'][i] == 13 or data['labels'][i] == 14 or data['labels'][i] == 15 or data['labels'][i] == 7 or data['labels'][i] == 15:
+            continue
+        element_type = element.split('_')[0]
+        x, y = data['boxes'][i][:2]
+        element_width, element_height = size[element_type]
+        min_x = min(min_x, x)
+        min_y = min(min_y, y)
+        max_x = max(max_x, x + element_width)
+        max_y = max(max_y, y + element_height)
+    return min_x, min_y, max_x, max_y
+def calculate_pool_waypoints(idx, data, size, source_idx, target_idx, source_element, target_element):
+    # Get the bounding boxes of the source and target elements
+    source_box = data['boxes'][source_idx]
+    target_box = data['boxes'][target_idx]
+    # Get the midpoints of the source element
+    source_mid_x = (source_box[0] + source_box[2]) / 2
+    source_mid_y = (source_box[1] + source_box[3]) / 2
+    # Check if the connection involves a pool
+    if source_element == 'pool':
+        pool_box = source_box
+        element_box = (target_box[0], target_box[1], target_box[0]+size[target_element][0], target_box[1]+size[target_element][1])
+        element_mid_x = (element_box[0] + element_box[2]) / 2
+        element_mid_y = (element_box[1] + element_box[3]) / 2
+        # Connect the pool's bottom or top side to the target element's top or bottom center
+        if pool_box[3] < element_box[1]:  # Pool is above the target element
+            waypoints = [(element_mid_x, pool_box[3]-50), (element_mid_x, element_box[1])]
+        else:  # Pool is below the target element
+            waypoints = [(element_mid_x, element_box[3]), (element_mid_x, pool_box[1]-50)]
+    else:
+        pool_box = target_box
+        element_box = (source_box[0], source_box[1], source_box[0]+size[source_element][0], source_box[1]+size[source_element][1])
+        element_mid_x = (element_box[0] + element_box[2]) / 2
+        element_mid_y = (element_box[1] + element_box[3]) / 2
+        # Connect the element's bottom or top center to the pool's top or bottom side
+        if pool_box[3] < element_box[1]:  # Pool is above the target element
+            waypoints = [(element_mid_x, element_box[1]), (element_mid_x, pool_box[3]-50)]
+        else:  # Pool is below the target element
+            waypoints = [(element_mid_x, element_box[3]), (element_mid_x, pool_box[1]-50)]
+    return waypoints
+def create_flow_element(bpmn, text_mapping, idx, size, data, parent, message=False):
+    source_idx, target_idx = data['links'][idx]
+    source_id, target_id = data['BPMN_id'][source_idx], data['BPMN_id'][target_idx]
+    if message:
+        element_id = f'messageflow_{source_id}_{target_id}'
+    else:
+        element_id = f'sequenceflow_{source_id}_{target_id}'
+    if source_id.split('_')[0] == 'pool' or target_id.split('_')[0] == 'pool':
+        waypoints = calculate_pool_waypoints(idx, data, size, source_idx, target_idx, source_id.split('_')[0], target_id.split('_')[0])
+        #waypoints = data['best_points'][idx]
+        if source_id.split('_')[0] == 'pool':
+            source_id = f"participant_{source_id.split('_')[1]}"
+        if target_id.split('_')[0] == 'pool':
+            target_id = f"participant_{target_id.split('_')[1]}"
+    else:
+        waypoints = calculate_waypoints(data, size, source_id, target_id)
+        #waypoints = data['best_points'][idx]
+    #waypoints = data['best_points'][idx]
+    if message:
+        element = ET.SubElement(parent, 'bpmn:messageFlow', id=element_id, sourceRef=source_id, targetRef=target_id, name=text_mapping[data['BPMN_id'][idx]])
+    else:
+        element = ET.SubElement(parent, 'bpmn:sequenceFlow', id=element_id, sourceRef=source_id, targetRef=target_id, name=text_mapping[data['BPMN_id'][idx]])
+    add_diagram_edge(bpmn, element_id, waypoints)

train.py ADDED Viewed

	@@ -0,0 +1,394 @@

+import copy
+import cv2
+import numpy as np
+import random
+import time
+import torch
+import torchvision.transforms.functional as F
+import matplotlib.pyplot as plt
+from eval import main_evaluation
+from torch.optim import SGD, AdamW
+from torch.utils.data import DataLoader, Dataset, Subset, ConcatDataset
+from torch.utils.data.dataloader import default_collate
+from torchvision.models.detection import keypointrcnn_resnet50_fpn, KeypointRCNN_ResNet50_FPN_Weights
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor
+from tqdm import tqdm
+from utils import write_results
+def get_arrow_model(num_classes, num_keypoints=2):
+    """
+    Configures and returns a modified Keypoint R-CNN model based on ResNet-50 with FPN, adapted for a custom number of classes and keypoints.
+    Parameters:
+    - num_classes (int): Number of classes for the model to detect, excluding the background class.
+    - num_keypoints (int): Number of keypoints to predict for each detected object.
+    Returns:
+    - model (torch.nn.Module): The modified Keypoint R-CNN model.
+    Steps:
+    1. Load a pre-trained Keypoint R-CNN model with a ResNet-50 backbone and Feature Pyramid Network (FPN).
+       The model is initially configured for the COCO dataset, which includes various object classes and keypoints.
+    2. Replace the box predictor to adjust the number of output classes. The box predictor is responsible for
+       classifying detected regions and predicting their bounding boxes.
+    3. Replace the keypoint predictor to adjust the number of keypoints the model predicts for each object.
+       This is necessary to tailor the model to specific tasks that may have different keypoint structures.
+    """
+    # Load a model pre-trained on COCO, initialized without pre-trained weights
+    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+    if device == torch.device('cuda'):
+        model = keypointrcnn_resnet50_fpn(weights=KeypointRCNN_ResNet50_FPN_Weights.COCO_V1)
+    else:
+        model = keypointrcnn_resnet50_fpn(weights=False)
+    # Get the number of input features for the classifier in the box predictor.
+    in_features = model.roi_heads.box_predictor.cls_score.in_features
+    # Replace the box predictor in the ROI heads with a new one, tailored to the number of classes.
+    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+    # Replace the keypoint predictor in the ROI heads with a new one, specifically designed for the desired number of keypoints.
+    model.roi_heads.keypoint_predictor = KeypointRCNNPredictor(512, num_keypoints)
+    return model
+from torchvision.models.detection import fasterrcnn_resnet50_fpn
+from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
+def get_faster_rcnn_model(num_classes):
+    """
+    Configures and returns a modified Faster R-CNN model based on ResNet-50 with FPN, adapted for a custom number of classes.
+    Parameters:
+    - num_classes (int): Number of classes for the model to detect, including the background class.
+    Returns:
+    - model (torch.nn.Module): The modified Faster R-CNN model.
+    """
+    # Load a pre-trained Faster R-CNN model
+    model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)
+    # Get the number of input features for the classifier in the box predictor
+    in_features = model.roi_heads.box_predictor.cls_score.in_features
+    # Replace the box predictor with a new one, tailored to the number of classes (num_classes includes the background)
+    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
+    return model
+def prepare_model(dict,opti,learning_rate= 0.0003,model_to_load=None, model_type = 'object'):
+  # Adjusted to pass the class_dict directly
+  if model_type == 'object':
+    model = get_faster_rcnn_model(len(dict))
+  elif model_type == 'arrow':
+    model = get_arrow_model(len(dict),2)
+  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+  # Load the model weights
+  if model_to_load:
+    model.load_state_dict(torch.load('./models/'+ model_to_load +'.pth', map_location=device))
+    print(f"Model '{model_to_load}'  loaded")
+  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+  model.to(device)
+  if opti == 'SGD':
+    #learning_rate= 0.002
+    optimizer = SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=0.0001)
+  elif opti == 'Adam':
+    #learning_rate = 0.0003
+    optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=0.00056, eps=1e-08, betas=(0.9, 0.999))
+  else:
+    print('Optimizer not found')
+  return model, optimizer, device
+def evaluate_loss(model, data_loader, device, loss_config=None, print_losses=False):
+    model.train()  # Set the model to evaluation mode
+    total_loss = 0
+    # Initialize lists to keep track of individual losses
+    loss_classifier_list = []
+    loss_box_reg_list = []
+    loss_objectness_list = []
+    loss_rpn_box_reg_list = []
+    loss_keypoints_list = []
+    with torch.no_grad():  # Disable gradient computation
+        for images, targets_im in tqdm(data_loader, desc="Evaluating"):
+            images = [image.to(device) for image in images]
+            targets = [{k: v.clone().detach().to(device) for k, v in t.items()} for t in targets_im]
+            loss_dict = model(images, targets)
+            # Calculate the total loss for the current batch
+            losses = 0
+            if loss_config is not None:
+                for key, loss in loss_dict.items():
+                    if loss_config.get(key, False):
+                        losses += loss
+            else:
+                losses = sum(loss for key, loss in loss_dict.items())
+            total_loss += losses.item()
+            # Collect individual losses
+            if loss_dict.get('loss_classifier') is not None:
+                loss_classifier_list.append(loss_dict['loss_classifier'].item())
+            else:
+                loss_classifier_list.append(0)
+            if loss_dict.get('loss_box_reg') is not None:
+                loss_box_reg_list.append(loss_dict['loss_box_reg'].item())
+            else:
+                loss_box_reg_list.append(0)
+            if loss_dict.get('loss_objectness') is not None:
+                loss_objectness_list.append(loss_dict['loss_objectness'].item())
+            else:
+                loss_objectness_list.append(0)
+            if loss_dict.get('loss_rpn_box_reg') is not None:
+                loss_rpn_box_reg_list.append(loss_dict['loss_rpn_box_reg'].item())
+            else:
+                loss_rpn_box_reg_list.append(0)
+            if 'loss_keypoint' in loss_dict:
+                loss_keypoints_list.append(loss_dict['loss_keypoint'].item())
+            else:
+                loss_keypoints_list.append(0)
+    # Calculate average loss
+    avg_loss = total_loss / len(data_loader)
+    avg_loss_classifier = np.mean(loss_classifier_list)
+    avg_loss_box_reg = np.mean(loss_box_reg_list)
+    avg_loss_objectness = np.mean(loss_objectness_list)
+    avg_loss_rpn_box_reg = np.mean(loss_rpn_box_reg_list)
+    avg_loss_keypoints = np.mean(loss_keypoints_list)
+    if print_losses:
+      print(f"Average Loss: {avg_loss:.4f}")
+      print(f"Average Classifier Loss: {avg_loss_classifier:.4f}")
+      print(f"Average Box Regression Loss: {avg_loss_box_reg:.4f}")
+      print(f"Average Objectness Loss: {avg_loss_objectness:.4f}")
+      print(f"Average RPN Box Regression Loss: {avg_loss_rpn_box_reg:.4f}")
+      print(f"Average Keypoints Loss: {avg_loss_keypoints:.4f}")
+    return avg_loss
+def training_model(num_epochs, model, data_loader, subset_test_loader,
+                   optimizer, model_to_load=None, change_learning_rate=5, start_key=30,
+                   batch_size=4, crop_prob=0.2, h_flip_prob=0.3, v_flip_prob=0.3,
+                   max_rotate_deg=20, rotate_proba=0.2, blur_prob=0.2,
+                   score_threshold=0.7, iou_threshold=0.5, early_stop_f1_score=0.97,
+                   information_training='training', start_epoch=0, loss_config=None, model_type = 'object',
+                   eval_metric='f1_score', device=torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')):
+  if loss_config is None:
+     print('No loss config found, all losses will be used.')
+  else:
+     #print the list of the losses that will be used
+      print('The following losses will be used: ', end='')
+      for key, value in loss_config.items():
+          if value:
+              print(key, end=", ")
+      print()
+  # Initialize lists to store epoch-wise average losses
+  epoch_avg_losses = []
+  epoch_avg_loss_classifier = []
+  epoch_avg_loss_box_reg = []
+  epoch_avg_loss_objectness = []
+  epoch_avg_loss_rpn_box_reg = []
+  epoch_avg_loss_keypoints = []
+  epoch_precision = []
+  epoch_recall = []
+  epoch_f1_score = []
+  epoch_test_loss = []
+  start_tot = time.time()
+  best_metrics = -1000
+  best_epoch = 0
+  best_model_state = None
+  same = 0
+  learning_rate = optimizer.param_groups[0]['lr']
+  bad_test_loss = 0
+  previous_test_loss = 1000
+  print(f"Let's go training {model_type} model with {num_epochs} epochs!")
+  print(f"Learning rate: {learning_rate}, Batch size: {batch_size}, Crop prob: {crop_prob}, Flip prob: {h_flip_prob}, Rotate prob: {rotate_proba}, Blur prob: {blur_prob}")
+  for epoch in range(num_epochs):
+      if (epoch>0 and (epoch)%change_learning_rate == 0) or bad_test_loss>1:
+        learning_rate = 0.7*learning_rate
+        optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=learning_rate, eps=1e-08, betas=(0.9, 0.999))
+        print(f'Learning rate changed to {learning_rate:.4} and the best epoch for now is {best_epoch}')
+        bad_test_loss = 0
+      if epoch>0 and (epoch)==start_key:
+        print("Now it's training Keypoints also")
+        loss_config['loss_keypoint'] = True
+        for name, param in model.named_parameters():
+          if 'keypoint' in name:
+              param.requires_grad = True
+      model.train()
+      start = time.time()
+      total_loss = 0
+      # Initialize lists to keep track of individual losses
+      loss_classifier_list = []
+      loss_box_reg_list = []
+      loss_objectness_list = []
+      loss_rpn_box_reg_list = []
+      loss_keypoints_list =  []
+      # Create a tqdm progress bar
+      progress_bar = tqdm(data_loader, desc=f'Epoch {epoch+1+start_epoch}')
+      for images, targets_im in progress_bar:
+          images = [image.to(device) for image in images]
+          targets = [{k: v.clone().detach().to(device) for k, v in t.items()} for t in targets_im]
+          optimizer.zero_grad()
+          loss_dict = model(images, targets)
+          # Inside the training loop where losses are calculated:
+          losses = 0
+          if loss_config is not None:
+            for key, loss in loss_dict.items():
+                if loss_config.get(key, False):
+                    if key == 'loss_classifier':
+                      loss *= 3
+                    losses += loss
+          else:
+            losses = sum(loss for key, loss in loss_dict.items())
+          # Collect individual losses
+          if loss_dict['loss_classifier']:
+            loss_classifier_list.append(loss_dict['loss_classifier'].item())
+          else:
+            loss_classifier_list.append(0)
+          if loss_dict['loss_box_reg']:
+            loss_box_reg_list.append(loss_dict['loss_box_reg'].item())
+          else:
+            loss_box_reg_list.append(0)
+          if loss_dict['loss_objectness']:
+            loss_objectness_list.append(loss_dict['loss_objectness'].item())
+          else:
+            loss_objectness_list.append(0)
+          if loss_dict['loss_rpn_box_reg']:
+            loss_rpn_box_reg_list.append(loss_dict['loss_rpn_box_reg'].item())
+          else:
+            loss_rpn_box_reg_list.append(0)
+          if 'loss_keypoint' in loss_dict:
+            loss_keypoints_list.append(loss_dict['loss_keypoint'].item())
+          else:
+            loss_keypoints_list.append(0)
+          losses.backward()
+          optimizer.step()
+          total_loss += losses.item()
+          # Update the description with the current loss
+          progress_bar.set_description(f'Epoch {epoch+1+start_epoch}, Loss: {losses.item():.4f}')
+      # Calculate average loss
+      avg_loss = total_loss / len(data_loader)
+      epoch_avg_losses.append(avg_loss)
+      epoch_avg_loss_classifier.append(np.mean(loss_classifier_list))
+      epoch_avg_loss_box_reg.append(np.mean(loss_box_reg_list))
+      epoch_avg_loss_objectness.append(np.mean(loss_objectness_list))
+      epoch_avg_loss_rpn_box_reg.append(np.mean(loss_rpn_box_reg_list))
+      epoch_avg_loss_keypoints.append(np.mean(loss_keypoints_list))
+        # Evaluate the model on the test set
+      if eval_metric != 'loss':
+        avg_test_loss = 0
+        labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = main_evaluation(model, subset_test_loader,score_threshold=0.5, iou_threshold=0.5, distance_threshold=10, key_correction=False, model_type=model_type)
+        print(f"Epoch {epoch+1+start_epoch}, Average Loss: {avg_loss:.4f}, Labels_precision: {labels_precision:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1_score:.4f} ", end=", ")
+        if eval_metric == 'all':
+          avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
+          print(f"Epoch {epoch+1+start_epoch}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
+      if eval_metric == 'loss':
+        labels_precision, precision, recall, f1_score, key_accuracy, reverted_accuracy = 0,0,0,0,0,0
+        avg_test_loss = evaluate_loss(model, subset_test_loader, device, loss_config)
+        print(f"Epoch {epoch+1+start_epoch}, Average Training Loss: {avg_loss:.4f}, Average Test Loss: {avg_test_loss:.4f}", end=", ")
+      print(f"Time: {time.time() - start:.2f} [s]")
+      if epoch>0 and (epoch)%start_key == 0:
+        print(f"Keypoints Accuracy: {key_accuracy:.4f}", end=", ")
+      if eval_metric == 'f1_score':
+        metric_used = f1_score
+      elif eval_metric == 'precision':
+        metric_used = precision
+      elif eval_metric == 'recall':
+        metric_used = recall
+      else:
+        metric_used = -avg_test_loss
+      # Check if this epoch's model has the lowest average loss
+      if metric_used > best_metrics:
+          best_metrics = metric_used
+          best_epoch = epoch+1+start_epoch
+          best_model_state = copy.deepcopy(model.state_dict())
+      if epoch>0 and f1_score>early_stop_f1_score:
+        same+=1
+      epoch_precision.append(precision)
+      epoch_recall.append(recall)
+      epoch_f1_score.append(f1_score)
+      epoch_test_loss.append(avg_test_loss)
+      name_model = f"model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}"
+      if same >=1 :
+        metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
+        torch.save(best_model_state, './models/'+ name_model +'.pth')
+        write_results(name_model,metrics_list,start_epoch)
+        break
+      if (epoch+1+start_epoch) % 5 == 0:
+        metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
+        torch.save(best_model_state, './models/'+ name_model +'.pth')
+        model.load_state_dict(best_model_state)
+        write_results(name_model,metrics_list,start_epoch)
+      if avg_test_loss > previous_test_loss:
+        bad_test_loss += 1
+      previous_test_loss = avg_test_loss
+  print(f"\n Total time: {(time.time() - start_tot)/60} minutes, Best Epoch is {best_epoch} with an f1_score of {best_metrics:.4f}")
+  if best_model_state:
+      metrics_list = [epoch_avg_losses,epoch_avg_loss_classifier,epoch_avg_loss_box_reg,epoch_avg_loss_objectness,epoch_avg_loss_rpn_box_reg,epoch_avg_loss_keypoints,epoch_precision,epoch_recall,epoch_f1_score,epoch_test_loss]
+      torch.save(best_model_state, './models/'+ name_model +'.pth')
+      model.load_state_dict(best_model_state)
+      write_results(name_model,metrics_list,start_epoch)
+      print(f"Name of the best model: model_{type(optimizer).__name__}_{epoch+1+start_epoch}ep_{batch_size}batch_trainval_blur0{int(blur_prob*10)}_crop0{int(crop_prob*10)}_flip0{int(h_flip_prob*10)}_rotate0{int(rotate_proba*10)}_{information_training}")
+  return model, metrics_list

utils.py ADDED Viewed

	@@ -0,0 +1,936 @@

+from torchvision.models.detection import keypointrcnn_resnet50_fpn
+from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
+from torchvision.models.detection.keypoint_rcnn import KeypointRCNNPredictor
+from torchvision.models.detection import KeypointRCNN_ResNet50_FPN_Weights
+import random
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms.functional as F
+import numpy as np
+from torch.utils.data.dataloader import default_collate
+import cv2
+import matplotlib.pyplot as plt
+from torch.utils.data import DataLoader, Subset, ConcatDataset
+from tqdm import tqdm
+from torch.optim import SGD
+import time
+from torch.optim import AdamW
+import copy
+from torchvision import transforms
+object_dict = {
+    0: 'background',
+    1: 'task',
+    2: 'exclusiveGateway',
+    3: 'event',
+    4: 'parallelGateway',
+    5: 'messageEvent',
+    6: 'pool',
+    7: 'lane',
+    8: 'dataObject',
+    9: 'dataStore',
+    10: 'subProcess',
+    11: 'eventBasedGateway',
+    12: 'timerEvent',
+}
+arrow_dict = {
+    0: 'background',
+    1: 'sequenceFlow',
+    2: 'dataAssociation',
+    3: 'messageFlow',
+}
+class_dict = {
+    0: 'background',
+    1: 'task',
+    2: 'exclusiveGateway',
+    3: 'event',
+    4: 'parallelGateway',
+    5: 'messageEvent',
+    6: 'pool',
+    7: 'lane',
+    8: 'dataObject',
+    9: 'dataStore',
+    10: 'subProcess',
+    11: 'eventBasedGateway',
+    12: 'timerEvent',
+    13: 'sequenceFlow',
+    14: 'dataAssociation',
+    15: 'messageFlow',
+}
+def rescale_boxes(scale, boxes):
+    for i in range(len(boxes)):
+                boxes[i] = [boxes[i][0]*scale,
+                            boxes[i][1]*scale,
+                            boxes[i][2]*scale,
+                            boxes[i][3]*scale]
+    return boxes
+def iou(box1, box2):
+    # Calcule l'intersection des deux boîtes englobantes
+    inter_box = [max(box1[0], box2[0]), max(box1[1], box2[1]), min(box1[2], box2[2]), min(box1[3], box2[3])]
+    inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
+    # Calcule l'union des deux boîtes englobantes
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = box1_area + box2_area - inter_area
+    return inter_area / union_area
+def proportion_inside(box1, box2):
+    # Calculate the intersection of the two bounding boxes
+    inter_box = [max(box1[0], box2[0]), max(box1[1], box2[1]), min(box1[2], box2[2]), min(box1[3], box2[3])]
+    inter_area = max(0, inter_box[2] - inter_box[0]) * max(0, inter_box[3] - inter_box[1])
+    # Calculate the area of box1
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    # Calculate the proportion of box1 inside box2
+    if box1_area == 0:
+        return 0
+    proportion = inter_area / box1_area
+    # Ensure the proportion is at most 100%
+    return min(proportion, 1.0)
+def resize_boxes(boxes, original_size, target_size):
+    """
+    Resizes bounding boxes according to a new image size.
+    Parameters:
+    - boxes (np.array): The original bounding boxes as a numpy array of shape [N, 4].
+    - original_size (tuple): The original size of the image as (width, height).
+    - target_size (tuple): The desired size to resize the image to as (width, height).
+    Returns:
+    - np.array: The resized bounding boxes as a numpy array of shape [N, 4].
+    """
+    orig_width, orig_height = original_size
+    target_width, target_height = target_size
+    # Calculate the ratios for width and height
+    width_ratio = target_width / orig_width
+    height_ratio = target_height / orig_height
+    # Apply the ratios to the bounding boxes
+    boxes[:, 0] *= width_ratio
+    boxes[:, 1] *= height_ratio
+    boxes[:, 2] *= width_ratio
+    boxes[:, 3] *= height_ratio
+    return boxes
+def resize_keypoints(keypoints: np.ndarray, original_size: tuple, target_size: tuple) -> np.ndarray:
+    """
+    Resize keypoints based on the original and target dimensions of an image.
+    Parameters:
+    - keypoints (np.ndarray): The array of keypoints, where each keypoint is represented by its (x, y) coordinates.
+    - original_size (tuple): The width and height of the original image (width, height).
+    - target_size (tuple): The width and height of the target image (width, height).
+    Returns:
+    - np.ndarray: The resized keypoints.
+    Explanation:
+    The function calculates the ratio of the target dimensions to the original dimensions.
+    It then applies these ratios to the x and y coordinates of each keypoint to scale them
+    appropriately to the target image size.
+    """
+    orig_width, orig_height = original_size
+    target_width, target_height = target_size
+    # Calculate the ratios for width and height scaling
+    width_ratio = target_width / orig_width
+    height_ratio = target_height / orig_height
+    # Apply the scaling ratios to the x and y coordinates of each keypoint
+    keypoints[:, 0] *= width_ratio  # Scale x coordinates
+    keypoints[:, 1] *= height_ratio  # Scale y coordinates
+    return keypoints
+class RandomCrop:
+    def __init__(self, new_size=(1333,800),crop_fraction=0.5, min_objects=4):
+        self.crop_fraction = crop_fraction
+        self.min_objects = min_objects
+        self.new_size = new_size
+    def __call__(self, image, target):
+        new_w1, new_h1 = self.new_size
+        w, h = image.size
+        new_w = int(w * self.crop_fraction)
+        new_h = int(new_w*new_h1/new_w1)
+        i=0
+        for i in range(4):
+          if new_h >= h:
+            i += 0.05
+            new_w = int(w * (self.crop_fraction - i))
+            new_h = int(new_w*new_h1/new_w1)
+          if new_h < h:
+            continue
+        if new_h >= h:
+          return image, target
+        boxes = target["boxes"]
+        if 'keypoints' in target:
+            keypoints = target["keypoints"]
+        else:
+            keypoints = []
+            for i in range(len(boxes)):
+                keypoints.append(torch.zeros((2,3)))
+        # Attempt to find a suitable crop region
+        success = False
+        for _ in range(100):  # Max 100 attempts to find a valid crop
+            top = random.randint(0, h - new_h)
+            left = random.randint(0, w - new_w)
+            crop_region = [left, top, left + new_w, top + new_h]
+            # Check how many objects are fully contained in this region
+            contained_boxes = []
+            contained_keypoints = []
+            for box, kp in zip(boxes, keypoints):
+                if box[0] >= crop_region[0] and box[1] >= crop_region[1] and box[2] <= crop_region[2] and box[3] <= crop_region[3]:
+                    # Adjust box and keypoints coordinates
+                    new_box = box - torch.tensor([crop_region[0], crop_region[1], crop_region[0], crop_region[1]])
+                    new_kp = kp - torch.tensor([crop_region[0], crop_region[1], 0])
+                    contained_boxes.append(new_box)
+                    contained_keypoints.append(new_kp)
+            if len(contained_boxes) >= self.min_objects:
+                success = True
+                break
+        if success:
+            # Perform the actual crop
+            image = F.crop(image, top, left, new_h, new_w)
+            target["boxes"] = torch.stack(contained_boxes) if contained_boxes else torch.zeros((0, 4))
+            if 'keypoints' in target:
+                target["keypoints"] = torch.stack(contained_keypoints) if contained_keypoints else torch.zeros((0, 2, 4))
+        return image, target
+class RandomFlip:
+    def __init__(self, h_flip_prob=0.5, v_flip_prob=0.5):
+        """
+        Initializes the RandomFlip with probabilities for flipping.
+        Parameters:
+        - h_flip_prob (float): Probability of applying a horizontal flip to the image.
+        - v_flip_prob (float): Probability of applying a vertical flip to the image.
+        """
+        self.h_flip_prob = h_flip_prob
+        self.v_flip_prob = v_flip_prob
+    def __call__(self, image, target):
+        """
+        Applies random horizontal and/or vertical flip to the image and updates target data accordingly.
+        Parameters:
+        - image (PIL Image): The image to be flipped.
+        - target (dict): The target dictionary containing 'boxes' and 'keypoints'.
+        Returns:
+        - PIL Image, dict: The flipped image and its updated target dictionary.
+        """
+        if random.random() < self.h_flip_prob:
+            image = F.hflip(image)
+            w, _ = image.size  # Get the new width of the image after flip for bounding box adjustment
+            # Adjust bounding boxes for horizontal flip
+            for i, box in enumerate(target['boxes']):
+                xmin, ymin, xmax, ymax = box
+                target['boxes'][i] = torch.tensor([w - xmax, ymin, w - xmin, ymax], dtype=torch.float32)
+            # Adjust keypoints for horizontal flip
+            if 'keypoints' in target:
+                new_keypoints = []
+                for keypoints_for_object in target['keypoints']:
+                    flipped_keypoints_for_object = []
+                    for kp in keypoints_for_object:
+                        x, y = kp[:2]
+                        new_x = w - x
+                        flipped_keypoints_for_object.append(torch.tensor([new_x, y] + list(kp[2:])))
+                    new_keypoints.append(torch.stack(flipped_keypoints_for_object))
+                target['keypoints'] = torch.stack(new_keypoints)
+        if random.random() < self.v_flip_prob:
+            image = F.vflip(image)
+            _, h = image.size  # Get the new height of the image after flip for bounding box adjustment
+            # Adjust bounding boxes for vertical flip
+            for i, box in enumerate(target['boxes']):
+                xmin, ymin, xmax, ymax = box
+                target['boxes'][i] = torch.tensor([xmin, h - ymax, xmax, h - ymin], dtype=torch.float32)
+            # Adjust keypoints for vertical flip
+            if 'keypoints' in target:
+                new_keypoints = []
+                for keypoints_for_object in target['keypoints']:
+                    flipped_keypoints_for_object = []
+                    for kp in keypoints_for_object:
+                        x, y = kp[:2]
+                        new_y = h - y
+                        flipped_keypoints_for_object.append(torch.tensor([x, new_y] + list(kp[2:])))
+                    new_keypoints.append(torch.stack(flipped_keypoints_for_object))
+                target['keypoints'] = torch.stack(new_keypoints)
+        return image, target
+class RandomRotate:
+    def __init__(self, max_rotate_deg=20, rotate_proba=0.3):
+        """
+        Initializes the RandomRotate with a maximum rotation angle and probability of rotating.
+        Parameters:
+        - max_rotate_deg (int): Maximum degree to rotate the image.
+        - rotate_proba (float): Probability of applying rotation to the image.
+        """
+        self.max_rotate_deg = max_rotate_deg
+        self.rotate_proba = rotate_proba
+    def __call__(self, image, target):
+        """
+        Randomly rotates the image and updates the target data accordingly.
+        Parameters:
+        - image (PIL Image): The image to be rotated.
+        - target (dict): The target dictionary containing 'boxes', 'labels', and 'keypoints'.
+        Returns:
+        - PIL Image, dict: The rotated image and its updated target dictionary.
+        """
+        if random.random() < self.rotate_proba:
+            angle = random.uniform(-self.max_rotate_deg, self.max_rotate_deg)
+            image = F.rotate(image, angle, expand=False, fill=200)
+            # Rotate bounding boxes
+            w, h = image.size
+            cx, cy = w / 2, h / 2
+            boxes = target["boxes"]
+            new_boxes = []
+            for box in boxes:
+                new_box = self.rotate_box(box, angle, cx, cy)
+                new_boxes.append(new_box)
+            target["boxes"] = torch.stack(new_boxes)
+            # Rotate keypoints
+            if 'keypoints' in target:
+                new_keypoints = []
+                for keypoints in target["keypoints"]:
+                    new_kp = self.rotate_keypoints(keypoints, angle, cx, cy)
+                    new_keypoints.append(new_kp)
+                target["keypoints"] = torch.stack(new_keypoints)
+        return image, target
+    def rotate_box(self, box, angle, cx, cy):
+        """
+        Rotates a bounding box by a given angle around the center of the image.
+        """
+        x1, y1, x2, y2 = box
+        corners = torch.tensor([
+            [x1, y1],
+            [x2, y1],
+            [x2, y2],
+            [x1, y2]
+        ])
+        corners = torch.cat((corners, torch.ones(corners.shape[0], 1)), dim=1)
+        M = cv2.getRotationMatrix2D((cx, cy), angle, 1)
+        corners = torch.matmul(torch.tensor(M, dtype=torch.float32), corners.T).T
+        x_ = corners[:, 0]
+        y_ = corners[:, 1]
+        x_min, x_max = torch.min(x_), torch.max(x_)
+        y_min, y_max = torch.min(y_), torch.max(y_)
+        return torch.tensor([x_min, y_min, x_max, y_max], dtype=torch.float32)
+    def rotate_keypoints(self, keypoints, angle, cx, cy):
+        """
+        Rotates keypoints by a given angle around the center of the image.
+        """
+        new_keypoints = []
+        for kp in keypoints:
+            x, y, v = kp
+            point = torch.tensor([x, y, 1])
+            M = cv2.getRotationMatrix2D((cx, cy), angle, 1)
+            new_point = torch.matmul(torch.tensor(M, dtype=torch.float32), point)
+            new_keypoints.append(torch.tensor([new_point[0], new_point[1], v], dtype=torch.float32))
+        return torch.stack(new_keypoints)
+def rotate_90_box(box, angle, w, h):
+    x1, y1, x2, y2 = box
+    if angle == 90:
+        return torch.tensor([y1,h-x2,y2,h-x1])
+    elif angle == 270 or angle == -90:
+        return torch.tensor([w-y2,x1,w-y1,x2])
+    else:
+        print("angle not supported")
+def rotate_90_keypoints(kp, angle, w, h):
+    # Extract coordinates and visibility from each keypoint tensor
+    x1, y1, v1 = kp[0][0], kp[0][1], kp[0][2]
+    x2, y2, v2 = kp[1][0], kp[1][1], kp[1][2]
+    # Swap x and y coordinates for each keypoint
+    if angle == 90:
+        new = [[y1, h-x1, v1], [y2, h-x2, v2]]
+    elif angle == 270 or angle == -90:
+        new = [[w-y1, x1, v1], [w-y2, x2, v2]]
+    return torch.tensor(new, dtype=torch.float32)
+def rotate_vertical(image, target):
+    # Rotate the image and target if the image is vertical
+    new_boxes = []
+    angle = random.choice([-90,90])
+    image = F.rotate(image, angle, expand=True, fill=200)
+    for box in target["boxes"]:
+        new_box = rotate_90_box(box, angle, image.size[0], image.size[1])
+        new_boxes.append(new_box)
+    target["boxes"] = torch.stack(new_boxes)
+    if 'keypoints' in target:
+        new_kp = []
+        for kp in target['keypoints']:
+            new_key = rotate_90_keypoints(kp, angle, image.size[0], image.size[1])
+            new_kp.append(new_key)
+        target['keypoints'] = torch.stack(new_kp)
+    return image, target
+class BPMN_Dataset(Dataset):
+    def __init__(self, annotations, transform=None, crop_transform=None, crop_prob=0.3, rotate_90_proba=0.2, flip_transform=None, rotate_transform=None, new_size=(1333,800),keep_ratio=False,resize=True, model_type='object', rotate_vertical=False):
+        self.annotations = annotations
+        print(f"Loaded {len(self.annotations)} annotations.")
+        self.transform = transform
+        self.crop_transform = crop_transform
+        self.crop_prob = crop_prob
+        self.flip_transform = flip_transform
+        self.rotate_transform = rotate_transform
+        self.resize = resize
+        self.rotate_vertical = rotate_vertical
+        self.new_size = new_size
+        self.keep_ratio = keep_ratio
+        self.model_type = model_type
+        if model_type == 'object':
+            self.dict = object_dict
+        elif model_type == 'arrow':
+            self.dict = arrow_dict
+        self.rotate_90_proba = rotate_90_proba
+    def __len__(self):
+        return len(self.annotations)
+    def __getitem__(self, idx):
+        annotation = self.annotations[idx]
+        image = annotation.img.convert("RGB")
+        boxes = torch.tensor(np.array(annotation.boxes_ltrb), dtype=torch.float32)
+        labels_names = [ann for ann in annotation.categories]
+        #only keep the labels, boxes and keypoints that are in the class_dict
+        kept_indices = [i for i, ann in enumerate(annotation.categories) if ann in self.dict.values()]
+        boxes = boxes[kept_indices]
+        labels_names = [ann for i, ann in enumerate(labels_names) if i in kept_indices]
+        labels_id = torch.tensor([(list(self.dict.values()).index(ann)) for ann in labels_names], dtype=torch.int64)
+        # Initialize keypoints tensor
+        max_keypoints = 2
+        keypoints = torch.zeros((len(labels_id), max_keypoints, 3), dtype=torch.float32)
+        ii=0
+        for i, ann in enumerate(annotation.annotations):
+            #only keep the keypoints that are in the kept indices
+            if i not in kept_indices:
+                continue
+            if ann.category in ["sequenceFlow", "messageFlow", "dataAssociation"]:
+                # Fill the keypoints tensor for this annotation, mark as visible (1)
+                kp = np.array(ann.keypoints, dtype=np.float32).reshape(-1, 3)
+                kp = kp[:,:2]
+                visible = np.ones((kp.shape[0], 1), dtype=np.float32)
+                kp = np.hstack([kp, visible])
+                keypoints[ii, :kp.shape[0], :] = torch.tensor(kp, dtype=torch.float32)
+                ii += 1
+        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+        if self.model_type == 'object':
+            target = {
+                "boxes": boxes,
+                "labels": labels_id,
+                #"area": area,
+                #"keypoints": keypoints,
+            }
+        elif self.model_type == 'arrow':
+            target = {
+                "boxes": boxes,
+                "labels": labels_id,
+                #"area": area,
+                "keypoints": keypoints,
+            }
+        # Randomly apply flip transform
+        if self.flip_transform:
+            image, target = self.flip_transform(image, target)
+        # Randomly apply rotate transform
+        if self.rotate_transform:
+            image, target = self.rotate_transform(image, target)
+        # Randomly apply the custom cropping transform
+        if self.crop_transform and random.random() < self.crop_prob:
+            image, target = self.crop_transform(image, target)
+        # Rotate vertical image
+        if self.rotate_vertical and random.random() < self.rotate_90_proba:
+            image, target = rotate_vertical(image, target)
+        if self.resize:
+            if self.keep_ratio:
+                original_size = image.size
+                # Calculate scale to fit the new size while maintaining aspect ratio
+                scale = min(self.new_size[0] / original_size[0], self.new_size[1] / original_size[1])
+                new_scaled_size = (int(original_size[0] * scale), int(original_size[1] * scale))
+                target['boxes'] = resize_boxes(target['boxes'], (image.size[0],image.size[1]), (new_scaled_size))
+                if 'area' in target:
+                    target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
+                if 'keypoints' in target:
+                    for i in range(len(target['keypoints'])):
+                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0],image.size[1]), (new_scaled_size))
+                # Resize image to new scaled size
+                image = F.resize(image, (new_scaled_size[1], new_scaled_size[0]))
+                # Pad the resized image to make it exactly the desired size
+                padding = [0, 0, self.new_size[0] - new_scaled_size[0], self.new_size[1] - new_scaled_size[1]]
+                image = F.pad(image, padding, fill=200, padding_mode='constant')
+            else:
+                target['boxes'] = resize_boxes(target['boxes'], (image.size[0],image.size[1]), self.new_size)
+                if 'area' in target:
+                    target['area'] = (target['boxes'][:, 3] - target['boxes'][:, 1]) * (target['boxes'][:, 2] - target['boxes'][:, 0])
+                if 'keypoints' in target:
+                    for i in range(len(target['keypoints'])):
+                        target['keypoints'][i] = resize_keypoints(target['keypoints'][i], (image.size[0],image.size[1]), self.new_size)
+                image = F.resize(image, (self.new_size[1], self.new_size[0]))
+        return self.transform(image), target
+def collate_fn(batch):
+    """
+    Custom collation function for DataLoader that handles batches of images and targets.
+    This function ensures that images are properly batched together using PyTorch's default collation,
+    while keeping the targets (such as bounding boxes and labels) in a list of dictionaries,
+    as each image might have a different number of objects detected.
+    Parameters:
+    - batch (list): A list of tuples, where each tuple contains an image and its corresponding target dictionary.
+    Returns:
+    - Tuple containing:
+      - Tensor: Batched images.
+      - List of dicts: Targets corresponding to each image in the batch.
+    """
+    images, targets = zip(*batch)  # Unzip the batch into separate lists for images and targets.
+    # Batch images using the default collate function which handles tensors, numpy arrays, numbers, etc.
+    images = default_collate(images)
+    return images, targets
+def create_loader(new_size,transformation, annotations1, annotations2=None,
+                  batch_size=4, crop_prob=0.2, crop_fraction=0.7, min_objects=3,
+                  h_flip_prob=0.3, v_flip_prob=0.3, max_rotate_deg=20, rotate_90_proba=0.2, rotate_proba=0.3,
+                  seed=42, resize=True, rotate_vertical=False, keep_ratio=False, model_type = 'object'):
+    """
+    Creates a DataLoader for BPMN datasets with optional transformations and concatenation of two datasets.
+    Parameters:
+    - transformation (callable): Transformation function to apply to each image (e.g., normalization).
+    - annotations1 (list): Primary list of annotations.
+    - annotations2 (list, optional): Secondary list of annotations to concatenate with the first.
+    - batch_size (int): Number of images per batch.
+    - crop_prob (float): Probability of applying the crop transformation.
+    - crop_fraction (float): Fraction of the original width to use when cropping.
+    - min_objects (int): Minimum number of objects required to be within the crop.
+    - h_flip_prob (float): Probability of applying horizontal flip.
+    - v_flip_prob (float): Probability of applying vertical flip.
+    - seed (int): Seed for random number generators for reproducibility.
+    - resize (bool): Flag indicating whether to resize images after transformations.
+    Returns:
+    - DataLoader: Configured data loader for the dataset.
+    """
+    # Initialize custom transformations for cropping and flipping
+    custom_crop_transform = RandomCrop(new_size,crop_fraction, min_objects)
+    custom_flip_transform = RandomFlip(h_flip_prob, v_flip_prob)
+    custom_rotate_transform = RandomRotate(max_rotate_deg, rotate_proba)
+    # Create the primary dataset
+    dataset = BPMN_Dataset(
+        annotations=annotations1,
+        transform=transformation,
+        crop_transform=custom_crop_transform,
+        crop_prob=crop_prob,
+        rotate_90_proba=rotate_90_proba,
+        flip_transform=custom_flip_transform,
+        rotate_transform=custom_rotate_transform,
+        rotate_vertical=rotate_vertical,
+        new_size=new_size,
+        keep_ratio=keep_ratio,
+        model_type=model_type,
+        resize=resize
+    )
+    # Optionally concatenate a second dataset
+    if annotations2:
+        dataset2 = BPMN_Dataset(
+            annotations=annotations2,
+            transform=transformation,
+            crop_transform=custom_crop_transform,
+            crop_prob=crop_prob,
+            rotate_90_proba=rotate_90_proba,
+            flip_transform=custom_flip_transform,
+            rotate_vertical=rotate_vertical,
+            new_size=new_size,
+            keep_ratio=keep_ratio,
+            model_type=model_type,
+            resize=resize
+        )
+        dataset = ConcatDataset([dataset, dataset2])  # Concatenate the two datasets
+    # Set the seed for reproducibility in random operations within transformations and data loading
+    random.seed(seed)
+    torch.manual_seed(seed)
+    # Create the DataLoader with the dataset
+    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
+    return data_loader
+def write_results(name_model,metrics_list,start_epoch):
+  with open('./results/'+ name_model+ '.txt', 'w') as f:
+        for i in range(len(metrics_list[0])):
+          f.write(f"{i+1+start_epoch},{metrics_list[0][i]},{metrics_list[1][i]},{metrics_list[2][i]},{metrics_list[3][i]},{metrics_list[4][i]},{metrics_list[5][i]},{metrics_list[6][i]},{metrics_list[7][i]},{metrics_list[8][i]},{metrics_list[9][i]} \n")
+def find_other_keypoint(idx, keypoints, boxes):
+    box = boxes[idx]
+    key1,key2 = keypoints[idx]
+    x1, y1, x2, y2 = box
+    center = ((x1 + x2) // 2, (y1 + y2) // 2)
+    average_keypoint = (key1 + key2) // 2
+    #find the opposite keypoint to the center
+    if average_keypoint[0] < center[0]:
+        x = center[0] + abs(center[0] - average_keypoint[0])
+    else:
+        x = center[0] - abs(center[0] - average_keypoint[0])
+    if average_keypoint[1] < center[1]:
+        y = center[1] + abs(center[1] - average_keypoint[1])
+    else:
+        y = center[1] - abs(center[1] - average_keypoint[1])
+    return x, y, average_keypoint[0], average_keypoint[1]
+def filter_overlap_boxes(boxes, scores, labels, keypoints, iou_threshold=0.5):
+    """
+    Filters overlapping boxes based on the Intersection over Union (IoU) metric, keeping only the boxes with the highest scores.
+    Parameters:
+    - boxes (np.ndarray): Array of bounding boxes with shape (N, 4), where each row contains [x_min, y_min, x_max, y_max].
+    - scores (np.ndarray): Array of scores for each box, reflecting the confidence of detection.
+    - labels (np.ndarray): Array of labels corresponding to each box.
+    - keypoints (np.ndarray): Array of keypoints associated with each box.
+    - iou_threshold (float): Threshold for IoU above which a box is considered overlapping.
+    Returns:
+    - tuple: Filtered boxes, scores, labels, and keypoints.
+    """
+    # Calculate the area of each bounding box to use in IoU calculation.
+    areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+    # Sort the indices of the boxes based on their scores in descending order.
+    order = scores.argsort()[::-1]
+    keep = []  # List to store indices of boxes to keep.
+    while order.size > 0:
+        # Take the first index (highest score) from the sorted list.
+        i = order[0]
+        keep.append(i)  # Add this index to 'keep' list.
+        # Compute the coordinates of the intersection rectangle.
+        xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
+        yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
+        xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
+        yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
+        # Compute the area of the intersection rectangle.
+        w = np.maximum(0.0, xx2 - xx1)
+        h = np.maximum(0.0, yy2 - yy1)
+        inter = w * h
+        # Calculate IoU and find boxes with IoU less than the threshold to keep.
+        iou = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(iou <= iou_threshold)[0]
+        # Update the list of box indices to consider in the next iteration.
+        order = order[inds + 1]  # Skip the first element since it's already included in 'keep'.
+    # Use the indices in 'keep' to select the boxes, scores, labels, and keypoints to return.
+    boxes = boxes[keep]
+    scores = scores[keep]
+    labels = labels[keep]
+    keypoints = keypoints[keep]
+    return boxes, scores, labels, keypoints
+def draw_annotations(image,
+                     target=None,
+                     prediction=None,
+                     full_prediction=None,
+                     text_predictions=None,
+                     model_dict=class_dict,
+                     draw_keypoints=False,
+                     draw_boxes=False,
+                     draw_text=False,
+                     draw_links=False,
+                     draw_twins=False,
+                     write_class=False,
+                     write_score=False,
+                     write_text=False,
+                     write_idx=False,
+                     score_threshold=0.4,
+                     keypoints_correction=False,
+                     only_print=None,
+                     axis=False,
+                     return_image=False,
+                     new_size=(1333,800),
+                     resize=False):
+    """
+    Draws annotations on images including bounding boxes, keypoints, links, and text.
+    Parameters:
+    - image (np.array): The image on which annotations will be drawn.
+    - target (dict): Ground truth data containing boxes, labels, etc.
+    - prediction (dict): Prediction data from a model.
+    - full_prediction (dict): Additional detailed prediction data, potentially including relationships.
+    - text_predictions (tuple): OCR text predictions containing bounding boxes and texts.
+    - model_dict (dict): Mapping from class IDs to class names.
+    - draw_keypoints (bool): Flag to draw keypoints.
+    - draw_boxes (bool): Flag to draw bounding boxes.
+    - draw_text (bool): Flag to draw text annotations.
+    - draw_links (bool): Flag to draw links between annotations.
+    - draw_twins (bool): Flag to draw twins keypoints.
+    - write_class (bool): Flag to write class names near the annotations.
+    - write_score (bool): Flag to write scores near the annotations.
+    - write_text (bool): Flag to write OCR recognized text.
+    - score_threshold (float): Threshold for scores above which annotations will be drawn.
+    - only_print (str): Specific class name to filter annotations by.
+    - resize (bool): Whether to resize annotations to fit the image size.
+    """
+    # Convert image to RGB (if not already in that format)
+    if prediction is None:
+        image = image.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image_copy = image.copy()
+    scale = max(image.shape[0], image.shape[1]) / 1000
+    # Function to draw bounding boxes and keypoints
+    def draw(data,is_prediction=False):
+        """ Helper function to draw annotations based on provided data. """
+        for i in range(len(data['boxes'])):
+            if is_prediction:
+                box = data['boxes'][i].tolist()
+                x1, y1, x2, y2 = box
+                if resize:
+                    x1, y1, x2, y2 = resize_boxes(np.array([box]), new_size, (image_copy.shape[1],image_copy.shape[0]))[0]
+                score = data['scores'][i].item()
+                if score < score_threshold:
+                    continue
+            else:
+                box = data['boxes'][i].tolist()
+                x1, y1, x2, y2 = box
+            if draw_boxes:
+                if only_print is not None:
+                    if data['labels'][i] != list(model_dict.values()).index(only_print):
+                        continue
+                cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 0) if is_prediction else (0, 0, 0), int(2*scale))
+            if is_prediction and write_score:
+                cv2.putText(image_copy, str(round(score, 2)), (int(x1), int(y1) + int(15*scale)), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (100,100, 255), 2)
+            if write_class and 'labels' in data:
+                class_id = data['labels'][i].item()
+                cv2.putText(image_copy, model_dict[class_id], (int(x1), int(y1) - int(2*scale)), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (255, 100, 100), 2)
+            if write_idx:
+                cv2.putText(image_copy, str(i), (int(x1) + int(15*scale), int(y1) + int(15*scale)), cv2.FONT_HERSHEY_SIMPLEX, 2*scale, (0,0, 0), 2)
+            # Draw keypoints if available
+            if draw_keypoints and 'keypoints' in data:
+                if is_prediction and keypoints_correction:
+                    for idx, (key1, key2) in enumerate(data['keypoints']):
+                        if data['labels'][idx] not in [list(model_dict.values()).index('sequenceFlow'),
+                                    list(model_dict.values()).index('messageFlow'),
+                                    list(model_dict.values()).index('dataAssociation')]:
+                            continue
+                        # Calculate the Euclidean distance between the two keypoints
+                        distance = np.linalg.norm(key1[:2] - key2[:2])
+                        if distance < 5:
+                            x_new,y_new, x,y = find_other_keypoint(idx, data['keypoints'], data['boxes'])
+                            data['keypoints'][idx][0] = torch.tensor([x_new, y_new,1])
+                            data['keypoints'][idx][1] = torch.tensor([x, y,1])
+                            print("keypoint has been changed")
+                for i in range(len(data['keypoints'])):
+                    kp = data['keypoints'][i]
+                    for j in range(kp.shape[0]):
+                        if is_prediction and data['labels'][i] != list(model_dict.values()).index('sequenceFlow') and data['labels'][i] != list(model_dict.values()).index('messageFlow') and data['labels'][i] != list(model_dict.values()).index('dataAssociation'):
+                            continue
+                        if is_prediction:
+                            score = data['scores'][i]
+                            if score < score_threshold:
+                                continue
+                        x,y,v = np.array(kp[j])
+                        if resize:
+                            x, y, v = resize_keypoints(np.array([kp[j]]), new_size, (image_copy.shape[1],image_copy.shape[0]))[0]
+                        if j == 0:
+                            cv2.circle(image_copy, (int(x), int(y)), int(5*scale), (0, 0, 255), -1)
+                        else:
+                            cv2.circle(image_copy, (int(x), int(y)), int(5*scale), (255, 0, 0), -1)
+        # Draw text predictions if available
+        if (draw_text or write_text) and text_predictions is not None:
+            for i in range(len(text_predictions[0])):
+                x1, y1, x2, y2 = text_predictions[0][i]
+                text = text_predictions[1][i]
+                if resize:
+                    x1, y1, x2, y2 = resize_boxes(np.array([[float(x1), float(y1), float(x2), float(y2)]]), new_size, (image_copy.shape[1],image_copy.shape[0]))[0]
+                if draw_text:
+                    cv2.rectangle(image_copy, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), int(2*scale))
+                if write_text:
+                    cv2.putText(image_copy, text, (int(x1 + int(2*scale)), int((y1+y2)/2) ), cv2.FONT_HERSHEY_SIMPLEX, scale/2, (0,0, 0), 2)
+    def draw_with_links(full_prediction):
+        '''Draws links between objects based on the full prediction data.'''
+        #check if keypoints detected are the same
+        if draw_twins and full_prediction is not None:
+            # Pre-calculate indices for performance
+            circle_color = (0, 255, 0)  # Green color for the circle
+            circle_radius = int(10 * scale)  # Circle radius scaled by image scale
+            for idx, (key1, key2) in enumerate(full_prediction['keypoints']):
+                if full_prediction['labels'][idx] not in [list(model_dict.values()).index('sequenceFlow'),
+                         list(model_dict.values()).index('messageFlow'),
+                         list(model_dict.values()).index('dataAssociation')]:
+                    continue
+                # Calculate the Euclidean distance between the two keypoints
+                distance = np.linalg.norm(key1[:2] - key2[:2])
+                if distance < 10:
+                    x_new,y_new, x,y = find_other_keypoint(idx,full_prediction)
+                    cv2.circle(image_copy, (int(x), int(y)), circle_radius, circle_color, -1)
+                    cv2.circle(image_copy, (int(x_new), int(y_new)), circle_radius, (0,0,0), -1)
+        # Draw links between objects
+        if draw_links==True and full_prediction is not None:
+            for i, (start_idx, end_idx) in enumerate(full_prediction['links']):
+                if start_idx is None or end_idx is None:
+                    continue
+                start_box = full_prediction['boxes'][start_idx]
+                end_box = full_prediction['boxes'][end_idx]
+                current_box = full_prediction['boxes'][i]
+                # Calculate the center of each bounding box
+                start_center = ((start_box[0] + start_box[2]) // 2, (start_box[1] + start_box[3]) // 2)
+                end_center = ((end_box[0] + end_box[2]) // 2, (end_box[1] + end_box[3]) // 2)
+                current_center = ((current_box[0] + current_box[2]) // 2, (current_box[1] + current_box[3]) // 2)
+                # Draw a line between the centers of the connected objects
+                cv2.line(image_copy, (int(start_center[0]), int(start_center[1])), (int(current_center[0]), int(current_center[1])), (0, 0, 255), int(2*scale))
+                cv2.line(image_copy, (int(current_center[0]), int(current_center[1])), (int(end_center[0]), int(end_center[1])), (255, 0, 0), int(2*scale))
+                i+=1
+    # Draw GT annotations
+    if target is not None:
+        draw(target, is_prediction=False)
+    # Draw predictions
+    if prediction is not None:
+        #prediction = prediction[0]
+        draw(prediction, is_prediction=True)
+    # Draw links with full predictions
+    if full_prediction is not None:
+        draw_with_links(full_prediction)
+    # Display the image
+    image_copy = cv2.cvtColor(image_copy, cv2.COLOR_BGR2RGB)
+    plt.figure(figsize=(12, 12))
+    plt.imshow(image_copy)
+    if axis==False:
+        plt.axis('off')
+    plt.show()
+    if return_image:
+        return image_copy
+def find_closest_object(keypoint, boxes, labels):
+    """
+    Find the closest object to a keypoint based on their proximity.
+    Parameters:
+    - keypoint (numpy.ndarray): The coordinates of the keypoint.
+    - boxes (numpy.ndarray): The bounding boxes of the objects.
+    Returns:
+    - int or None: The index of the closest object to the keypoint, or None if no object is found.
+    """
+    min_distance = float('inf')
+    closest_object_idx = None
+    # Iterate over each bounding box
+    for i, box in enumerate(boxes):
+        if labels[i] in [list(class_dict.values()).index('sequenceFlow'),
+                         list(class_dict.values()).index('messageFlow'),
+                         list(class_dict.values()).index('dataAssociation'),
+                         #list(class_dict.values()).index('pool'),
+                         list(class_dict.values()).index('lane')]:
+            continue
+        x1, y1, x2, y2 = box
+        top = ((x1+x2)/2, y1)
+        bottom = ((x1+x2)/2, y2)
+        left = (x1, (y1+y2)/2)
+        right = (x2, (y1+y2)/2)
+        points = [left, top , right, bottom]
+        # Calculate the distance between the keypoint and the center of the bounding box
+        for point in points:
+            distance = np.linalg.norm(keypoint[:2] - point)
+            # Update the closest object index if this object is closer
+            if distance < min_distance:
+                min_distance = distance
+                closest_object_idx = i
+                best_point = point
+    return closest_object_idx, best_point