Spaces:

Tzktz
/

Dit-document-layout-analysis

Sleeping

App Files Files Community

Dit-document-layout-analysis / unilm /kosmos-2 /demo /decode_string.py

Tzktz

Upload 7664 files

6fc683c verified over 1 year ago

raw

history blame contribute delete

4.02 kB

	import re
	import numpy as np

	def find_patch_index_combinations(s):
	# The regular expression pattern for matching the required formats
	pattern = r'(?:(<phrase>([^<]+)</phrase>))?<object>((?:<patch_index_\d+><patch_index_\d+></delimiter_of_multi_objects/>)*<patch_index_\d+><patch_index_\d+>)</object>'

	# Find all matches in the given string
	matches = re.findall(pattern, s)

	# Initialize an empty list to store the valid patch_index combinations
	valid_combinations = []

	for match in matches:
	phrase_tag, phrase, match_content = match
	if not phrase_tag:
	phrase = None

	# Split the match_content by the delimiter to get individual patch_index pairs
	patch_index_pairs = match_content.split('</delimiter_of_multi_objects/>')

	for pair in patch_index_pairs:
	# Extract the xxxx and yyyy values from the patch_index pair
	x = re.search(r'<patch_index_(\d+)>', pair)
	y = re.search(r'<patch_index_(\d+)>', pair[1:])

	if x and y:
	if phrase:
	valid_combinations.append((phrase, int(x.group(1)), int(y.group(1))))
	else:
	valid_combinations.append((f"<{x.group(1)}><{y.group(1)}>", int(x.group(1)), int(y.group(1))))

	return valid_combinations

	def get_box_coords_from_index(P, ul_idx, lr_idx):
	"""
	Given a grid of length P and the indices of the upper-left and lower-right corners of a bounding box,
	returns the normalized coordinates of the bounding box, in the form [x1, y1, x2, y2].

	Args:
	- P (int): the length of the grid
	- ul_idx (int): the index of the grid cell that corresponds to the upper-left corner of the bounding box
	- lr_idx (int): the index of the grid cell that corresponds to the lower-right corner of the bounding box

	Returns:
	- box_coords (np.array of shape (4,)): the normalized coordinates of the bounding box, in the form [x1, y1, x2, y2]
	"""
	# Compute the size of each cell in the grid
	cell_size = 1.0 / P

	# Compute the x and y indices of the upper-left and lower-right corners of the bounding box
	ul_x = ul_idx % P
	ul_y = ul_idx // P

	lr_x = lr_idx % P
	lr_y = lr_idx // P

	# Compute the normalized coordinates of the bounding box
	if ul_idx == lr_idx:
	x1 = ul_x * cell_size
	y1 = ul_y * cell_size
	x2 = lr_x * cell_size + cell_size
	y2 = lr_y * cell_size + cell_size
	elif ul_x == lr_x or ul_y == lr_y:
	x1 = ul_x * cell_size
	y1 = ul_y * cell_size
	x2 = lr_x * cell_size + cell_size
	y2 = lr_y * cell_size + cell_size
	else:
	x1 = ul_x * cell_size + cell_size / 2
	y1 = ul_y * cell_size + cell_size / 2
	x2 = lr_x * cell_size + cell_size / 2
	y2 = lr_y * cell_size + cell_size / 2

	return np.array([x1, y1, x2, y2])

	def decode_bbox_from_caption(caption, quantized_size=32, **kwargs):

	valid_combinations = find_patch_index_combinations(caption)
	entity_names = list(map(lambda x: x[0], valid_combinations))
	patch_index_coords = list(map(lambda pair: get_box_coords_from_index(quantized_size, pair[1], pair[2]), valid_combinations))
	collect_entity_location = []
	for entity_name, patch_index_coord in zip(entity_names, patch_index_coords):
	collect_entity_location.append([entity_name,] + patch_index_coord.tolist())

	# print(collect_entity_location)
	return collect_entity_location

	if __name__ == "__main__":

	caption = "a wet suit is at <object><patch_index_0003><patch_index_0004></delimiter_of_multi_objects/><patch_index_0005><patch_index_0006></object> in the picture"
	print(find_patch_index_combinations(caption))
	print(decode_bbox_from_caption(caption))