kryman27 commited on
Commit
a5152aa
verified
1 Parent(s): b82e672

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -23,15 +23,15 @@ def extract_invoice_data(pdf_file):
23
  extracted_words = page.extract_words()
24
  for word in extracted_words:
25
  words.append(word['text']) # Pobieramy tekst s艂owa
26
- bbox = [word['x0'], word['top'], word['x1'], word['bottom']]
27
  boxes.append(bbox) # Pobieramy bounding box (pozycj臋 s艂owa na stronie)
28
 
29
  # Tokenizacja tekstu + dodanie bounding boxes
30
- tokens = tokenizer(words, boxes=boxes, is_split_into_words=True, return_tensors="pt", truncation=True)
31
 
32
  # Predykcja modelu
33
  with torch.no_grad():
34
- outputs = model(**tokens)
35
  predictions = outputs.logits.argmax(-1).squeeze().tolist()
36
 
37
  # Przetwarzanie wynik贸w
 
23
  extracted_words = page.extract_words()
24
  for word in extracted_words:
25
  words.append(word['text']) # Pobieramy tekst s艂owa
26
+ bbox = [int(word['x0']), int(word['top']), int(word['x1']), int(word['bottom'])] # Zaokr膮glamy warto艣ci
27
  boxes.append(bbox) # Pobieramy bounding box (pozycj臋 s艂owa na stronie)
28
 
29
  # Tokenizacja tekstu + dodanie bounding boxes
30
+ encoding = tokenizer.encode_plus(words, boxes=boxes, return_tensors="pt", truncation=True) # Poprawiona linia
31
 
32
  # Predykcja modelu
33
  with torch.no_grad():
34
+ outputs = model(**encoding)
35
  predictions = outputs.logits.argmax(-1).squeeze().tolist()
36
 
37
  # Przetwarzanie wynik贸w