update readme
Browse files
README.md
CHANGED
@@ -15,6 +15,41 @@ widget:
|
|
15 |
|
16 |
Trained for 4 epochs.
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
```
|
19 |
model = BeitForSemanticSegmentation.from_pretrained("microsoft/dit-base", num_labels=11)
|
20 |
ds = load_dataset("ds4sd/DocLayNet-v1.1")
|
|
|
15 |
|
16 |
Trained for 4 epochs.
|
17 |
|
18 |
+
Usage:
|
19 |
+
|
20 |
+
```
|
21 |
+
image_processor = AutoImageProcessor.from_pretrained("microsoft/dit-large")
|
22 |
+
model = BeitForSemanticSegmentation.from_pretrained("jzju/dit-doclaynet")
|
23 |
+
image = Image.open('img.png').convert('RGB')
|
24 |
+
inputs = image_processor(images=image, return_tensors="pt")
|
25 |
+
outputs = model(**inputs)
|
26 |
+
# logits are of shape (batch_size, num_labels, height, width)
|
27 |
+
logits = outputs.logits
|
28 |
+
out = logits[0].detach()
|
29 |
+
out.size()
|
30 |
+
for i in range(11):
|
31 |
+
plt.imshow(out[i])
|
32 |
+
plt.show()
|
33 |
+
```
|
34 |
+
|
35 |
+
Labels:
|
36 |
+
|
37 |
+
```
|
38 |
+
1: Caption
|
39 |
+
2: Footnote
|
40 |
+
3: Formula
|
41 |
+
4: List-item
|
42 |
+
5: Page-footer
|
43 |
+
6: Page-header
|
44 |
+
7: Picture
|
45 |
+
8: Section-header
|
46 |
+
9: Table
|
47 |
+
10: Text
|
48 |
+
11: Title
|
49 |
+
```
|
50 |
+
|
51 |
+
Data label convert:
|
52 |
+
|
53 |
```
|
54 |
model = BeitForSemanticSegmentation.from_pretrained("microsoft/dit-base", num_labels=11)
|
55 |
ds = load_dataset("ds4sd/DocLayNet-v1.1")
|