import streamlit as st import torch from transformers import AutoProcessor, UdopForConditionalGeneration from PIL import Image # from datasets import load_dataset device = torch.device("cuda" if torch.cuda.is_available() else "cpu") processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=True) model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large") st.title("CIC Demo (by ITT)") st.write("Upload and Select a document (/an image) to test the model.") #2 column layout col1, col2 = st.columns(2) with col1: # File selection uploaded_files = st.file_uploader("Upload document(s) [/image(s)]:", type=["docx", "pdf", "pptx", "jpg", "jpeg", "png"], accept_multiple_files=True) selected_file = st.selectbox("Select a document (/an image):", uploaded_files, format_func=lambda file: file.name if file else "None") # Display selected file if selected_file is not None and selected_file != "None": file_extension = selected_file.name.split(".")[-1] if file_extension in ["jpg", "jpeg", "png"]: image = Image.open(selected_file).convert("RGB") st.image(selected_file, caption="Selected Image") else: st.write("Selected file: ", selected_file.name) # Model Testing with col2: ## Question (/Prompt) # question = "Question answering. How many unsafe practice of Lifting Operation?" default_question = "Is this a Lifting Operation scene?" task_type = st.selectbox("Question Type:", ("Classification", "Question Answering", "Layout Analysis")) question_text = st.text_input("Prompt:", placeholder=default_question) if question_text is not None: question = task_type + ". " + question_text else: question = task_type + ". " + default_question ## Test button testButton = st.button("Test Model") ## Perform Model Testing when Image is uploaded and selected as well as Test button is pressed if testButton and selected_file != "None": st.write("Testing the model with the selected image...") # encoding = processor(image, question, words, boxes=boxes, return_tensors="pt") model_encoding = processor(images=image, text=question, return_tensors="pt") model_output = model.generate(**model_encoding) match task_type: case "Classification": output_text = processor.batch_decode(model_output, skip_special_tokens=True)[0] case "Question Answering": output_text = processor.batch_decode(model_output, skip_special_tokens=True)[0] case "Layout Analysis": output_text = processor.batch_decode(model_output, skip_special_tokens=False)[0] st.write(output_text) elif testButton and selected_file == "None": st.write("Please upload and select a document (/an image).")