|
import streamlit as st |
|
from PIL import Image |
|
import pytesseract |
|
import re |
|
import pandas as pd |
|
|
|
|
|
st.title("Blood Test Report Parser") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a blood test report image", type=["jpg", "jpeg", "png"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
image = Image.open(uploaded_file) |
|
st.image(image, caption="Uploaded Image", use_container_width=True) |
|
|
|
|
|
with st.spinner("Extracting text from image..."): |
|
extracted_text = pytesseract.image_to_string(image) |
|
st.text_area("Extracted Text", extracted_text, height=300) |
|
|
|
|
|
pattern = r"(?P<component>[A-Za-z\s%]+?)\s+(?P<your_value>[\d.]+)\s+(?P<range>[\d.]+ - [\d.]+)\s+(?P<units>[\w/%]+)(?:\s+(?P<flag>[LH]))?" |
|
|
|
|
|
data = [] |
|
for match in re.finditer(pattern, extracted_text): |
|
component = match.group("component").strip() |
|
your_value = float(match.group("your_value")) |
|
range_min, range_max = map(float, match.group("range").split(" - ")) |
|
units = match.group("units") |
|
flag = match.group("flag") if match.group("flag") else "Normal" |
|
data.append({ |
|
"Component": component, |
|
"Your Value": your_value, |
|
"Min": range_min, |
|
"Max": range_max, |
|
"Units": units, |
|
"Flag": flag |
|
}) |
|
|
|
|
|
if data: |
|
df = pd.DataFrame(data) |
|
st.success("Parsed Data Successfully!") |
|
st.dataframe(df) |
|
else: |
|
st.error("No valid data found in the extracted text.") |
|
|