bLoOd_AI / app.py
erayman09's picture
Update app.py
9e4acbd verified
raw
history blame
1.67 kB
import streamlit as st
from PIL import Image
import pytesseract
import re
import pandas as pd
# Streamlit App
st.title("Blood Test Report Parser")
# File uploader
uploaded_file = st.file_uploader("Upload a blood test report image", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Display uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_container_width=True)
# Extract text using Tesseract OCR
with st.spinner("Extracting text from image..."):
extracted_text = pytesseract.image_to_string(image)
st.text_area("Extracted Text", extracted_text, height=300)
# Regex Pattern for Parsing
pattern = r"(?P<component>[A-Za-z\s%]+?)\s+(?P<your_value>[\d.]+)\s+(?P<range>[\d.]+ - [\d.]+)\s+(?P<units>[\w/%]+)(?:\s+(?P<flag>[LH]))?"
# Parse Extracted Text
data = []
for match in re.finditer(pattern, extracted_text):
component = match.group("component").strip()
your_value = float(match.group("your_value"))
range_min, range_max = map(float, match.group("range").split(" - "))
units = match.group("units")
flag = match.group("flag") if match.group("flag") else "Normal"
data.append({
"Component": component,
"Your Value": your_value,
"Min": range_min,
"Max": range_max,
"Units": units,
"Flag": flag
})
# Convert to DataFrame
if data:
df = pd.DataFrame(data)
st.success("Parsed Data Successfully!")
st.dataframe(df)
else:
st.error("No valid data found in the extracted text.")