File size: 2,237 Bytes
cd6b2f5
1138794
cd6b2f5
 
 
a8da724
 
efbd192
 
 
 
cd6b2f5
 
 
 
 
4fd374a
efbd192
4fd374a
 
 
 
efbd192
4fd374a
 
 
efbd192
9e4acbd
4fd374a
efbd192
4fd374a
 
 
 
47e5b46
 
efbd192
47e5b46
 
 
 
 
4fd374a
efbd192
b97a1bd
 
4fd374a
efbd192
4fd374a
efbd192
 
cd6b2f5
 
 
4fd374a
1138794
cd6b2f5
a8da724
 
 
 
efbd192
 
 
 
 
 
 
 
 
 
 
 
 
 
cd6b2f5
1138794
efbd192
1138794
efbd192
 
 
 
 
 
cd6b2f5
1138794
 
 
cd6b2f5
 
efbd192
 
 
cd6b2f5
efbd192
cd6b2f5
 
 
1138794
efbd192
 
cd6b2f5
1138794
efbd192
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import pytesseract
import pandas as pd
import re








def extract_text(image):
    """
    Extract text from the image using Tesseract.
    return pytesseract.image_to_string(image)



def clean_and_parse_extracted_text(raw_text):
    """
    Parse and clean the raw text to extract structured data.
    """
    # Split the text into lines and clean up
    lines = raw_text.split("\n")
    lines = [line.strip() for line in lines if line.strip()]

    # Identify and extract rows with valid components
    data = []
    for line in lines:
        # Match rows containing numeric ranges and values
        match = re.match(
            r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
            line,
            unit = match.group(7)
            flag = "Normal"  # Default flag

            # Determine the flag based on value and range
            if min_val is not None and max_val is not None:
                if value < min_val:
                    flag = "L"
                elif value > max_val:
                    flag = "H"

            # Only append the data if the flag is abnormal (L or H)
            if flag != "Normal":
                data.append([component, value, min_val, max_val, unit, flag])

    # Create a DataFrame
    df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])

    # Fix misspellings and inconsistencies (if any known issues exist)
    correction_map = {
        "emoglobin": "Hemoglobin",
        "ematocrit": "Hematocrit",
    return df




















def display_results(df):
    """
    Display the flagged abnormalities in a table format.
    """






    st.dataframe(df, use_container_width=True)





# Streamlit app
st.title("Blood Report Analyzer")
st.write("Upload an image of a blood test report to analyze.")

uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])

        # Parse the extracted text into a structured format
        parsed_data = clean_and_parse_extracted_text(extracted_text)

        # Display the structured data (only abnormalities)
        st.subheader("Flagged Abnormalities")
        display_results(parsed_data)

    except Exception as e: