File size: 1,600 Bytes
056ccc3
 
c1d4001
056ccc3
 
 
 
 
 
 
 
 
fd98f6f
056ccc3
 
 
fd98f6f
 
 
548ee28
fd98f6f
 
056ccc3
fd98f6f
 
180d132
fd98f6f
 
 
 
056ccc3
fd98f6f
 
056ccc3
fd98f6f
 
 
 
 
 
 
056ccc3
fd98f6f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import streamlit as st
from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
from pdf2image import convert_from_bytes
from PIL import Image

# Load model and processor
processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
model = LayoutLMv3ForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")

st.title("Document Classification with LayoutLMv3")

# File uploader for PDFs, JPGs, and PNGs
uploaded_file = st.file_uploader(
    "Upload Document", type=["pdf", "jpg", "png"], accept_multiple_files=False
)

if uploaded_file:
    # for uploaded_file in uploaded_files:
    if uploaded_file.type == "application/pdf":
        images = convert_from_bytes(uploaded_file.getvalue())
    else:
        images = [Image.open(uploaded_file)]

    # Process each image for classification
    for i, image in enumerate(images):
        st.image(image, caption=f'Uploaded Image {i}', use_container_width=True)
        # Prepare image for model input
        encoding = processor(image, return_tensors="pt")
        outputs = model(**encoding)
        predictions = outputs.logits.argmax(-1)

        # Display predictions (you may want to map indices to labels)
        st.write(f"Predictions: {predictions}")

        # User feedback section
        feedback = st.radio(
            "Is the classification correct?", ("Yes", "No")
        )
        if feedback == "No":
            correct_label = st.text_input(
                "Please provide the correct label:"
            )
            # Here you can implement logic to store or process feedback