João Pedro commited on
Commit
d73399c
·
1 Parent(s): 61dba08

set labels, id2label and label2id manually

Browse files
Files changed (1) hide show
  1. app.py +24 -4
app.py CHANGED
@@ -3,11 +3,31 @@ from transformers import LayoutLMv3Processor, LayoutLMv3ForSequenceClassificatio
3
  from pdf2image import convert_from_bytes
4
  from PIL import Image
5
 
6
- # Load model and processor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
8
- model = LayoutLMv3ForSequenceClassification.from_pretrained("microsoft/layoutlmv3-base")
9
- id2label = model.config.id2label
10
- print(id2label)
 
 
 
11
 
12
  st.title("Document Classification with LayoutLMv3")
13
 
 
3
  from pdf2image import convert_from_bytes
4
  from PIL import Image
5
 
6
+ labels = [
7
+ 'budget',
8
+ 'email',
9
+ 'form',
10
+ 'handwritten',
11
+ 'invoice',
12
+ 'language',
13
+ 'letter',
14
+ 'memo',
15
+ 'news article',
16
+ 'questionnaire',
17
+ 'resume',
18
+ 'scientific publication',
19
+ 'specification',
20
+ ]
21
+ id2label = {i: label for i, label in enumerate(labels)}
22
+ label2id = {v: k for k, v in id2label.items()}
23
+
24
  processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
25
+ model = LayoutLMv3ForSequenceClassification.from_pretrained(
26
+ "microsoft/layoutlmv3-base",
27
+ num_classes=len(labels),
28
+ id2label=id2label,
29
+ label2id=label2id,
30
+ )
31
 
32
  st.title("Document Classification with LayoutLMv3")
33