import streamlit as st
import torch
from transformers import AutoProcessor, UdopForConditionalGeneration
from PIL import Image
# from datasets import load_dataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = AutoProcessor.from_pretrained("microsoft/udop-large", apply_ocr=True)
model = UdopForConditionalGeneration.from_pretrained("microsoft/udop-large")

st.title("CIC Demo (by ITT)")
st.write("Upload and Select a document (/an image) to test the model.")

#2 column layout
col1, col2 = st.columns(2)

with col1:  
    # File selection
    uploaded_files = st.file_uploader("Upload document(s) [/image(s)]:", type=["docx", "pdf", "pptx", "jpg", "jpeg", "png"], accept_multiple_files=True)
    selected_file = st.selectbox("Select a document (/an image):", uploaded_files, format_func=lambda file: file.name if file else "None")
    
    # Display selected file
    if selected_file is not None and selected_file != "None":
        file_extension = selected_file.name.split(".")[-1]
        if file_extension in ["jpg", "jpeg", "png"]:
            image = Image.open(selected_file).convert("RGB")
            st.image(selected_file, caption="Selected Image")
        else:
            st.write("Selected file: ", selected_file.name)

# Model Testing 
with col2:  
    ## Question (/Prompt)
    # question = "Question answering. How many unsafe practice of Lifting Operation?"
    default_question = "Is this a Lifting Operation scene?"
    task_type = st.selectbox("Question Type:", ("Classification", "Question Answering", "Layout Analysis"))
    question_text = st.text_input("Prompt:", placeholder=default_question)
    if question_text is not None: 
        question = task_type + ". " + question_text
    else:
        question = task_type + ". " + default_question 
    
    ## Test button
    testButton = st.button("Test Model")
    
    ## Perform Model Testing when Image is uploaded and selected as well as Test button is pressed
    if testButton and selected_file != "None":
        st.write("Testing the model with the selected image...")
        # encoding = processor(image, question, words, boxes=boxes, return_tensors="pt")
        model_encoding = processor(images=image, text=question, return_tensors="pt")
        model_output = model.generate(**model_encoding)
        match task_type:
            case "Classification": 
                output_text = processor.batch_decode(model_output, skip_special_tokens=True)[0]
            case "Question Answering": 
                output_text = processor.batch_decode(model_output, skip_special_tokens=True)[0]
            case "Layout Analysis":
                output_text = processor.batch_decode(model_output, skip_special_tokens=False)[0]
        st.write(output_text)    
    elif testButton and selected_file == "None":
        st.write("Please upload and select a document (/an image).")