Midterm_Task_5 / app.py
resolverkatla's picture
Update
79fc11d
raw
history blame
2.38 kB
import streamlit as st
from layout_extractor import convert_pdf_to_images, analyze_layout, extract_text_from_blocks, extract_key_values
from processor import load_images, analyze_layout, extract_text_from_blocks, rule_based_kv_extraction
import json
st.set_page_config(page_title="Document AI", layout="wide")
st.title("🧠 AI-Driven Document Layout & Info Extractor")
uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"])
if uploaded_file:
images = load_images(uploaded_file)
for i, image in enumerate(images):
st.subheader(f"Page {i+1}")
st.image(image, use_column_width=True)
with st.spinner("Analyzing layout..."):
layout = analyze_layout(image)
blocks = extract_text_from_blocks(image, layout)
kv_data = rule_based_kv_extraction(blocks)
st.success("Done! Here's what we found:")
st.json(kv_data)
st.subheader("✏️ Edit Extracted Fields")
edited_data = {}
for key, value in kv_data.items():
edited_data[key] = st.text_input(f"{key}", value)
st.download_button("⬇️ Download JSON", data=json.dumps(edited_data, indent=2),
file_name="extracted_data.json", mime="application/json")
with st.expander("🔍 All Detected Segments"):
for b in blocks:
st.markdown(f"**{b['type']}**: {b['text'][:150]}...")
st.title("📄 AI-Driven Document Layout Analyzer")
uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"])
if uploaded_file:
if uploaded_file.name.endswith(".pdf"):
images = convert_pdf_to_images(uploaded_file)
else:
from PIL import Image
images = [Image.open(uploaded_file)]
for i, image in enumerate(images):
st.image(image, caption=f"Page {i+1}", use_column_width=True)
layout = analyze_layout(image)
blocks = extract_text_from_blocks(image, layout)
key_values = extract_key_values(blocks)
st.subheader("Extracted Key Data")
st.json(key_values)
st.subheader("All Segments")
for block in blocks:
st.markdown(f"**{block['type']}**: {block['text'][:200]}...")
st.download_button("Download JSON", data=json.dumps(key_values, indent=2), file_name="extracted_data.json")