Spaces:

erayman09
/

bLoOd_AI

Sleeping

App Files Files Community

erayman09 commited on Dec 9, 2024

Commit

b5e2857

verified ·

1 Parent(s): 4e5da36

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -60

app.py CHANGED Viewed

@@ -1,29 +1,34 @@
-import streamlit as st
-from PIL import Image
-import pytesseract
-import pandas as pd
-import re
-def extract_text(image):
-    """
-    Extract text from the image using Tesseract.
-    """
-    return pytesseract.image_to_string(image)
 def clean_and_parse_extracted_text(raw_text):
     """
     Parse and clean the raw text to extract structured data.
     """
-    # Split the text into lines and clean up
     lines = raw_text.split("\n")
     lines = [line.strip() for line in lines if line.strip()]
-    # Identify and extract rows with valid components
     data = []
     for line in lines:
-        # Match rows containing numeric ranges and values
         match = re.match(
             r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
             line,
@@ -39,9 +44,16 @@ def clean_and_parse_extracted_text(raw_text):
             else:
                 min_val = None
                 max_val = None
             unit = match.group(7)
             flag = "Normal"  # Default flag
             # Determine the flag based on value and range
             if min_val is not None and max_val is not None:
                 if value < min_val:
@@ -55,49 +67,7 @@ def clean_and_parse_extracted_text(raw_text):
     # Create a DataFrame
     df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
-    # Fix misspellings and inconsistencies (if any known issues exist)
-    correction_map = {
-        "emoglobin": "Hemoglobin",
-        "ematocrit": "Hematocrit",
-        "% Platelet Count": "Platelet Count",
-        "ymphocyte %": "Lymphocyte %",
-        "L Differential Type Automated": "Differential Type",
-    }
-    df["Component"] = df["Component"].replace(correction_map)
     return df
-def display_results(df):
-    """
-    Display the parsed data in a table format.
-    """
-    st.dataframe(df, use_container_width=True)
-# Streamlit app
-st.title("Blood Report Analyzer")
-st.write("Upload an image of a blood test report to analyze.")
-uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
-if uploaded_file is not None:
-    try:
-        # Load the image
-        image = Image.open(uploaded_file)
-        # Display the uploaded image
-        st.image(image, caption="Uploaded Image", use_container_width=True)
-        # Extract text from the image
-        extracted_text = extract_text(image)
-        # Parse the extracted text into a structured format
-        parsed_data = clean_and_parse_extracted_text(extracted_text)
-        # Display the structured data
-        st.subheader("Parsed Blood Test Results")
-        display_results(parsed_data)
-    except Exception as e:
-        st.error(f"An error occurred: {e}")

+# Default reference ranges for common blood components
+DEFAULT_RANGES = {
+    "White Blood Cell Count": (4, 11),
+    "Red Blood Cell Count": (4.4, 6),
+    "Hemoglobin": (13.5, 18),
+    "Hematocrit": (40, 52),
+    "MCV": (80, 100),
+    "MCH": (27, 33),
+    "MCHC": (31, 36),
+    "Neutrophil %": (49, 74),
+    "Lymphocyte %": (26, 46),
+    "Monocyte %": (2, 12),
+    "Eosinophil %": (0, 5),
+    "Basophil %": (0, 2),
+    "Abs. Neutrophil": (2.0, 8.0),
+    "Abs. Lymphocyte": (1.2, 4.8),
+    "Abs. Monocyte": (0.0, 0.8),
+    "Abs. Eosinophil": (0.0, 0.5),
+    "Abs. Basophil": (0.0, 0.2),
+}
 def clean_and_parse_extracted_text(raw_text):
     """
     Parse and clean the raw text to extract structured data.
     """
     lines = raw_text.split("\n")
     lines = [line.strip() for line in lines if line.strip()]
     data = []
     for line in lines:
+        # Match rows with numeric values and optional ranges
         match = re.match(
             r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
             line,
             else:
                 min_val = None
                 max_val = None
             unit = match.group(7)
             flag = "Normal"  # Default flag
+            # Use default ranges if OCR fails to extract them
+            if min_val is None or max_val is None:
+                default_range = DEFAULT_RANGES.get(component)
+                if default_range:
+                    min_val, max_val = default_range
             # Determine the flag based on value and range
             if min_val is not None and max_val is not None:
                 if value < min_val:
     # Create a DataFrame
     df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
+    # Clean up component names further if necessary
+    df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()
     return df