import streamlit as st
import cv2
import torch
from PIL import Image
from doclayout_yolo import YOLOv10
import numpy as np

# Load the pre-trained model
model = YOLOv10("doclayout_yolo_docstructbench_imgsz1024.pt")

# Automatically select device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#st.write(f"Using device: {device}")

# Streamlit UI
st.title("Document Layout Detection")
st.subheader("Upload an image to detect and annotate document layout")

uploaded_file = st.file_uploader("Upload Image", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Display the uploaded image
    #st.image(uploaded_file, caption="Uploaded Image", use_container_width=True)

    # Load the uploaded image
    image = Image.open(uploaded_file).convert("RGB")
    image_path = "temp_input.jpg"  # Temporary save for inference
    image.save(image_path)

    # Perform prediction
    with st.spinner("Processing..."):
        det_res = model.predict(
            image_path,
            imgsz=1024,
            conf=0.2,
            device=device,
        )

        # Annotate the result
        annotated_frame = det_res[0].plot(pil=True, line_width=5, font_size=20)

        # Convert annotated PIL image to displayable format
        annotated_image = np.array(annotated_frame)

        # Display the annotated image
        st.image(annotated_image, caption="Annotated Image", use_container_width=True)
        st.success("Detection completed!")


st.markdown("**Application Created By Shubham Mhaske**")
st.write("Do have a look on Papers 📄 : - https://arxiv.org/pdf/2410.12628")
st.write("Thanks to https://github.com/opendatalab")