Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,15 +23,15 @@ def extract_invoice_data(pdf_file):
|
|
23 |
extracted_words = page.extract_words()
|
24 |
for word in extracted_words:
|
25 |
words.append(word['text']) # Pobieramy tekst s艂owa
|
26 |
-
bbox = [word['x0'], word['top'], word['x1'], word['bottom']]
|
27 |
boxes.append(bbox) # Pobieramy bounding box (pozycj臋 s艂owa na stronie)
|
28 |
|
29 |
# Tokenizacja tekstu + dodanie bounding boxes
|
30 |
-
|
31 |
|
32 |
# Predykcja modelu
|
33 |
with torch.no_grad():
|
34 |
-
outputs = model(**
|
35 |
predictions = outputs.logits.argmax(-1).squeeze().tolist()
|
36 |
|
37 |
# Przetwarzanie wynik贸w
|
|
|
23 |
extracted_words = page.extract_words()
|
24 |
for word in extracted_words:
|
25 |
words.append(word['text']) # Pobieramy tekst s艂owa
|
26 |
+
bbox = [int(word['x0']), int(word['top']), int(word['x1']), int(word['bottom'])] # Zaokr膮glamy warto艣ci
|
27 |
boxes.append(bbox) # Pobieramy bounding box (pozycj臋 s艂owa na stronie)
|
28 |
|
29 |
# Tokenizacja tekstu + dodanie bounding boxes
|
30 |
+
encoding = tokenizer.encode_plus(words, boxes=boxes, return_tensors="pt", truncation=True) # Poprawiona linia
|
31 |
|
32 |
# Predykcja modelu
|
33 |
with torch.no_grad():
|
34 |
+
outputs = model(**encoding)
|
35 |
predictions = outputs.logits.argmax(-1).squeeze().tolist()
|
36 |
|
37 |
# Przetwarzanie wynik贸w
|