File size: 3,197 Bytes
cd6b2f5 1138794 cd6b2f5 4fd374a cd6b2f5 4fd374a cd6b2f5 9e4acbd 4fd374a cd6b2f5 4fd374a 47e5b46 4fd374a 47e5b46 4fd374a cd6b2f5 4fd374a 1138794 cd6b2f5 1138794 cd6b2f5 1138794 cd6b2f5 1138794 cd6b2f5 1138794 cd6b2f5 1138794 cd6b2f5 1138794 cd6b2f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import streamlit as st
from PIL import Image
import pytesseract
import pandas as pd
import re
def extract_text(image):
"""
Extract text from the image using Tesseract.
"""
return pytesseract.image_to_string(image)
def clean_and_parse_extracted_text(raw_text):
"""
Parse and clean the raw text to extract structured data.
"""
# Split the text into lines and clean up
lines = raw_text.split("\n")
lines = [line.strip() for line in lines if line.strip()]
# Identify and extract rows with valid components
data = []
for line in lines:
# Match rows containing numeric ranges and values
match = re.match(
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
line,
)
if match:
component = match.group(1).strip()
value = float(match.group(2))
range_match = match.group(4)
if range_match:
ranges = re.findall(r"[\d.]+", range_match)
min_val = float(ranges[0]) if len(ranges) > 0 else None
max_val = float(ranges[1]) if len(ranges) > 1 else None
else:
min_val = None
max_val = None
unit = match.group(7)
flag = "Normal" # Default flag
# Determine the flag based on value and range
if min_val is not None and max_val is not None:
if value < min_val:
flag = "L"
elif value > max_val:
flag = "H"
# Append structured data
data.append([component, value, min_val, max_val, unit, flag])
# Create a DataFrame
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
# Fix misspellings and inconsistencies (if any known issues exist)
correction_map = {
"emoglobin": "Hemoglobin",
"ematocrit": "Hematocrit",
"% Platelet Count": "Platelet Count",
"ymphocyte %": "Lymphocyte %",
"L Differential Type Automated": "Differential Type",
}
df["Component"] = df["Component"].replace(correction_map)
return df
def display_results(df):
"""
Display the parsed data in a table format.
"""
st.dataframe(df, use_container_width=True)
# Streamlit app
st.title("Blood Report Analyzer")
st.write("Upload an image of a blood test report to analyze.")
uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
if uploaded_file is not None:
try:
# Load the image
image = Image.open(uploaded_file)
# Display the uploaded image
st.image(image, caption="Uploaded Image", use_container_width=True)
# Extract text from the image
extracted_text = extract_text(image)
# Parse the extracted text into a structured format
parsed_data = clean_and_parse_extracted_text(extracted_text)
# Display the structured data
st.subheader("Parsed Blood Test Results")
display_results(parsed_data)
except Exception as e:
st.error(f"An error occurred: {e}")
|