File size: 2,411 Bytes
79fc11d
 
 
 
 
 
 
 
96ea338
79fc11d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc310bf
79fc11d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import streamlit as st
from layout_extractor import convert_pdf_to_images, analyze_layout, extract_text_from_blocks, extract_key_values
from processor import load_images, analyze_layout, extract_text_from_blocks, rule_based_kv_extraction
import json

st.set_page_config(page_title="Document AI", layout="wide")
st.title("🧠 AI-Driven Document Layout & Info Extractor")

uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"], key="upload1")

if uploaded_file:
    images = load_images(uploaded_file)
    for i, image in enumerate(images):
        st.subheader(f"Page {i+1}")
        st.image(image, use_column_width=True)

        with st.spinner("Analyzing layout..."):
            layout = analyze_layout(image)
            blocks = extract_text_from_blocks(image, layout)
            kv_data = rule_based_kv_extraction(blocks)

        st.success("Done! Here's what we found:")
        st.json(kv_data)

        st.subheader("✏️ Edit Extracted Fields")
        edited_data = {}
        for key, value in kv_data.items():
            edited_data[key] = st.text_input(f"{key}", value)

        st.download_button("⬇️ Download JSON", data=json.dumps(edited_data, indent=2),
                           file_name="extracted_data.json", mime="application/json")

        with st.expander("🔍 All Detected Segments"):
            for b in blocks:
                st.markdown(f"**{b['type']}**: {b['text'][:150]}...")

st.title("📄 AI-Driven Document Layout Analyzer")

uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"], key="upload1")

if uploaded_file:
    if uploaded_file.name.endswith(".pdf"):
        images = convert_pdf_to_images(uploaded_file)
    else:
        from PIL import Image
        images = [Image.open(uploaded_file)]

    for i, image in enumerate(images):
        st.image(image, caption=f"Page {i+1}", use_column_width=True)
        layout = analyze_layout(image)
        blocks = extract_text_from_blocks(image, layout)
        key_values = extract_key_values(blocks)

        st.subheader("Extracted Key Data")
        st.json(key_values)

        st.subheader("All Segments")
        for block in blocks:
            st.markdown(f"**{block['type']}**: {block['text'][:200]}...")

        st.download_button("Download JSON", data=json.dumps(key_values, indent=2), file_name="extracted_data.json")