Spaces:
Sleeping
Sleeping
File size: 2,411 Bytes
79fc11d 96ea338 79fc11d cc310bf 79fc11d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
from layout_extractor import convert_pdf_to_images, analyze_layout, extract_text_from_blocks, extract_key_values
from processor import load_images, analyze_layout, extract_text_from_blocks, rule_based_kv_extraction
import json
st.set_page_config(page_title="Document AI", layout="wide")
st.title("🧠 AI-Driven Document Layout & Info Extractor")
uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"], key="upload1")
if uploaded_file:
images = load_images(uploaded_file)
for i, image in enumerate(images):
st.subheader(f"Page {i+1}")
st.image(image, use_column_width=True)
with st.spinner("Analyzing layout..."):
layout = analyze_layout(image)
blocks = extract_text_from_blocks(image, layout)
kv_data = rule_based_kv_extraction(blocks)
st.success("Done! Here's what we found:")
st.json(kv_data)
st.subheader("✏️ Edit Extracted Fields")
edited_data = {}
for key, value in kv_data.items():
edited_data[key] = st.text_input(f"{key}", value)
st.download_button("⬇️ Download JSON", data=json.dumps(edited_data, indent=2),
file_name="extracted_data.json", mime="application/json")
with st.expander("🔍 All Detected Segments"):
for b in blocks:
st.markdown(f"**{b['type']}**: {b['text'][:150]}...")
st.title("📄 AI-Driven Document Layout Analyzer")
uploaded_file = st.file_uploader("Upload a PDF or Image", type=["pdf", "png", "jpg", "jpeg"], key="upload1")
if uploaded_file:
if uploaded_file.name.endswith(".pdf"):
images = convert_pdf_to_images(uploaded_file)
else:
from PIL import Image
images = [Image.open(uploaded_file)]
for i, image in enumerate(images):
st.image(image, caption=f"Page {i+1}", use_column_width=True)
layout = analyze_layout(image)
blocks = extract_text_from_blocks(image, layout)
key_values = extract_key_values(blocks)
st.subheader("Extracted Key Data")
st.json(key_values)
st.subheader("All Segments")
for block in blocks:
st.markdown(f"**{block['type']}**: {block['text'][:200]}...")
st.download_button("Download JSON", data=json.dumps(key_values, indent=2), file_name="extracted_data.json") |