Spaces:
Sleeping
Sleeping
File size: 5,104 Bytes
cfc8c5a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import numpy as np
from datasets import load_metric
from PIL import ImageDraw, ImageFont
import pandas as pd
metric = load_metric("seqeval")
def unnormalize_box(bbox, width, height):
return [
width * (bbox[0] / 1000),
height * (bbox[1] / 1000),
width * (bbox[2] / 1000),
height * (bbox[3] / 1000)
]
def normalize_box(bbox, width, height):
return [
int((bbox[0] / width) * 1000),
int((bbox[1] / height) * 1000),
int((bbox[2] / width) * 1000),
int((bbox[3] / height) * 1000)
]
def draw_output(image, true_predictions, true_boxes):
def iob_to_label(label):
label = label
if not label:
return 'other'
return label
# width, height = image.size
# predictions = logits.argmax(-1).squeeze().tolist()
# is_subword = np.array(offset_mapping)[:,0] != 0
# true_predictions = [id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]]
# true_boxes = [unnormalize_box(box, width, height) for idx, box in enumerate(token_boxes) if not is_subword[idx]]
# draw
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()
for prediction, box in zip(true_predictions, true_boxes):
predicted_label = iob_to_label(prediction).lower()
draw.rectangle(box, outline='red')
draw.text((box[0] + 10, box[1] - 10),
text=predicted_label, fill='red', font=font)
return image
def create_df(true_texts,
true_predictions,
chosen_labels=['SHOP_NAME', 'ADDR', 'TITLE', 'PHONE',
'PRODUCT_NAME', 'AMOUNT', 'UNIT', 'UPRICE', 'SUB_TPRICE', 'UDISCOUNT',
'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
'RECEMONEY', 'REMAMONEY',
'BILLID', 'DATETIME', 'CASHIER']
):
data = {'text': [], 'class_label': [], 'product_id': []}
product_id = -1
for text, prediction in zip(true_texts, true_predictions):
if prediction not in chosen_labels:
continue
if prediction == 'PRODUCT_NAME':
product_id += 1
if prediction in ['AMOUNT', 'UNIT', 'UDISCOUNT', 'UPRICE', 'SUB_TPRICE',
'UDISCOUNT', 'TAMOUNT', 'TPRICE', 'FPRICE', 'TDISCOUNT',
'RECEMONEY', 'REMAMONEY']:
text = reformat(text)
if prediction in ['AMOUNT', 'SUB_TPRICE', 'UPRICE', 'PRODUCT_NAME']:
data['product_id'].append(product_id)
else:
data['product_id'].append('')
data['class_label'].append(prediction)
data['text'].append(text)
df = pd.DataFrame(data)
return df
def reformat(text: str):
try:
text = text.replace('.', '').replace(',', '').replace(':', '').replace('/', '').replace('|', '').replace(
'\\', '').replace(')', '').replace('(', '').replace('-', '').replace(';', '').replace('_', '')
return int(text)
except:
return text
def find_product(product_name, df):
product_name = product_name.lower()
product_df = df[df['class_label'] == 'PRODUCT_NAME']
mask = product_df['text'].str.lower().str.contains(product_name, case=False, na=False)
if mask.any():
product_id = product_df.loc[mask, 'product_id'].iloc[0]
product_info = df[df['product_id'] == product_id]
prod_name = product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0]
try:
amount = product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0]
except:
print("Error: cannot find amount")
amount = ''
try:
uprice = product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0]
except:
print("Error: cannot find unit price")
uprice = ''
try:
sub_tprice = product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0]
except:
print("Error: cannot find sub total price")
sub_tprice = ''
#print("Sản phẩm: ", product_info.loc[product_info['class_label'] == 'PRODUCT_NAME', 'text'].iloc[0])
#print("Số lượng: ", product_info.loc[product_info['class_label'] == 'AMOUNT', 'text'].iloc[0])
#print("Đơn giá: ", product_info.loc[product_info['class_label'] == 'UPRICE', 'text'].iloc[0])
#print("Thành tiền: ", product_info.loc[product_info['class_label'] == 'SUB_TPRICE', 'text'].iloc[0])
return f"Sản phẩm: {prod_name}\n Số lượng: {amount}\n Đơn giá: {uprice}\n Thành tiền: {sub_tprice}"
else:
#print("Không tìm thấy item nào phù hợp.")
return "Không tìm thấy item nào phù hợp."
#return result = product_df['text'].str.contains(product_name, case=False, na=False).any()
#return product_df[product_df['text'].str.contains(product_name, case=False, na=False)]
|