File size: 3,197 Bytes
cd6b2f5
 
 
1138794
cd6b2f5
 
 
 
 
 
 
 
 
4fd374a
 
 
 
 
cd6b2f5
4fd374a
 
 
cd6b2f5
9e4acbd
4fd374a
cd6b2f5
4fd374a
 
 
 
 
 
 
 
 
 
47e5b46
 
4fd374a
 
 
 
47e5b46
 
 
 
 
 
 
 
4fd374a
 
 
 
 
 
 
cd6b2f5
 
 
 
 
 
 
 
 
4fd374a
 
1138794
cd6b2f5
 
1138794
cd6b2f5
1138794
cd6b2f5
1138794
 
cd6b2f5
 
 
1138794
cd6b2f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1138794
cd6b2f5
 
 
1138794
cd6b2f5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import streamlit as st
from PIL import Image
import pytesseract
import pandas as pd
import re


def extract_text(image):
    """
    Extract text from the image using Tesseract.
    """
    return pytesseract.image_to_string(image)


def clean_and_parse_extracted_text(raw_text):
    """
    Parse and clean the raw text to extract structured data.
    """
    # Split the text into lines and clean up
    lines = raw_text.split("\n")
    lines = [line.strip() for line in lines if line.strip()]

    # Identify and extract rows with valid components
    data = []
    for line in lines:
        # Match rows containing numeric ranges and values
        match = re.match(
            r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
            line,
        )
        if match:
            component = match.group(1).strip()
            value = float(match.group(2))
            range_match = match.group(4)
            if range_match:
                ranges = re.findall(r"[\d.]+", range_match)
                min_val = float(ranges[0]) if len(ranges) > 0 else None
                max_val = float(ranges[1]) if len(ranges) > 1 else None
            else:
                min_val = None
                max_val = None
            unit = match.group(7)
            flag = "Normal"  # Default flag

            # Determine the flag based on value and range
            if min_val is not None and max_val is not None:
                if value < min_val:
                    flag = "L"
                elif value > max_val:
                    flag = "H"

            # Append structured data
            data.append([component, value, min_val, max_val, unit, flag])

    # Create a DataFrame
    df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])

    # Fix misspellings and inconsistencies (if any known issues exist)
    correction_map = {
        "emoglobin": "Hemoglobin",
        "ematocrit": "Hematocrit",
        "% Platelet Count": "Platelet Count",
        "ymphocyte %": "Lymphocyte %",
        "L Differential Type Automated": "Differential Type",
    }
    df["Component"] = df["Component"].replace(correction_map)

    return df


def display_results(df):
    """
    Display the parsed data in a table format.
    """
    st.dataframe(df, use_container_width=True)


# Streamlit app
st.title("Blood Report Analyzer")
st.write("Upload an image of a blood test report to analyze.")

uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])

if uploaded_file is not None:
    try:
        # Load the image
        image = Image.open(uploaded_file)

        # Display the uploaded image
        st.image(image, caption="Uploaded Image", use_container_width=True)

        # Extract text from the image
        extracted_text = extract_text(image)

        # Parse the extracted text into a structured format
        parsed_data = clean_and_parse_extracted_text(extracted_text)

        # Display the structured data
        st.subheader("Parsed Blood Test Results")
        display_results(parsed_data)

    except Exception as e:
        st.error(f"An error occurred: {e}")