Spaces:

kh-CHEUNG
/

test_img_text-streamlit

Sleeping

File size: 2,055 Bytes

248b38d
ffec207
 
6ddafa8
3e6e09c
248b38d
961f6bd
 
 
 
 
a385b9c
961f6bd
a6ff839
d9e2f7d
 
092ff29
a6ff839
d9e2f7d
092ff29
d9e2f7d
 
6ddafa8
 
d9e2f7d
 
a6ff839
db0d96f
 
a385b9c
db0d96f
 
43b044b
 
db0d96f
 
a385b9c
092ff29
db0d96f
7bfd7cf
db0d96f
 
 
a385b9c
961f6bd
a6ff839
248b38d

import streamlit as st
import torch
from transformers import AutoProcessor, UdopForConditionalGeneration
from PIL import Image
# from datasets import load_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=True)
model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large")

st.title("CIC Demo (by ITT)")
st.write("Upload and Select a document (/an image) to test the model.")

# File selection
uploaded_files = st.file_uploader("Upload document(s) [/image(s)]:", type=["docx", "pdf", "pptx", "jpg", "jpeg", "png"], accept_multiple_files=True)
selected_file = st.selectbox("Select a document (/an image):", uploaded_files, format_func=lambda file: file.name if file else "None")

# Display selected file
if selected_file is not None and selected_file != "None":
    file_extension = selected_file.name.split(".")[-1]
    if file_extension in ["jpg", "jpeg", "png"]:
        image = Image.open(selected_file).convert("RGB")
        st.image(selected_file, caption="Selected Image")
    else:
        st.write("Selected file: ", selected_file.name)

# Model Testing 
## Test button
testButton = st.button("Test Model")

## Question (/Prompt)
# question = "Question answering. How many unsafe practice of Lifting Operation?"
question = "Question answering. Is is a scene of Lifting Operation?"

## Perform Model Testing when Image is uploaded and selected as well as Test button is pressed
if testButton and selected_file != "None":
    st.write("Testing the model with the selected image...")
    # encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
    model_encoding = processor(images=image, text=question, return_tensors="pt")
    model_output = model.generate(**model_encoding)
    output_text = processor.batch_decode(model_output, skip_special_tokens=True)[0]
    st.write(output_text)    
elif testButton and selected_file == "None":
    st.write("Please upload and select a document (/an image).")