Spaces:

erayman09
/

bLoOd_AI

Sleeping

App Files Files Community

erayman09 commited on Dec 9, 2024

Commit

cd6b2f5

verified ·

1 Parent(s): 1138794

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -58

app.py CHANGED Viewed

@@ -1,38 +1,29 @@
-import re
 import pandas as pd
-import gradio as gr
-# Default reference ranges for common blood components
-DEFAULT_RANGES = {
-    "White Blood Cell Count": (4, 11),
-    "Red Blood Cell Count": (4.4, 6),
-    "Hemoglobin": (13.5, 18),
-    "Hematocrit": (40, 52),
-    "MCV": (80, 100),
-    "MCH": (27, 33),
-    "MCHC": (31, 36),
-    "Neutrophil %": (49, 74),
-    "Lymphocyte %": (26, 46),
-    "Monocyte %": (2, 12),
-    "Eosinophil %": (0, 5),
-    "Basophil %": (0, 2),
-    "Abs. Neutrophil": (2.0, 8.0),
-    "Abs. Lymphocyte": (1.2, 4.8),
-    "Abs. Monocyte": (0.0, 0.8),
-    "Abs. Eosinophil": (0.0, 0.5),
-    "Abs. Basophil": (0.0, 0.2),
-}
 def clean_and_parse_extracted_text(raw_text):
     """
     Parse and clean the raw text to extract structured data.
     """
     lines = raw_text.split("\n")
     lines = [line.strip() for line in lines if line.strip()]
     data = []
     for line in lines:
-        # Match rows with numeric values and optional ranges
         match = re.match(
             r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
             line,
@@ -48,16 +39,9 @@ def clean_and_parse_extracted_text(raw_text):
             else:
                 min_val = None
                 max_val = None
             unit = match.group(7)
             flag = "Normal"  # Default flag
-            # Use default ranges if OCR fails to extract them
-            if min_val is None or max_val is None:
-                default_range = DEFAULT_RANGES.get(component)
-                if default_range:
-                    min_val, max_val = default_range
             # Determine the flag based on value and range
             if min_val is not None and max_val is not None:
                 if value < min_val:
@@ -71,40 +55,49 @@ def clean_and_parse_extracted_text(raw_text):
     # Create a DataFrame
     df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
-    # Clean up component names further if necessary
-    df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()
     return df
-def process_blood_test(image):
     """
-    Process the uploaded blood test report and return the analyzed data.
     """
-    # Step 1: Extract text from the image
-    import pytesseract
-    raw_text = pytesseract.image_to_string(image)
-    # Step 2: Parse and analyze the extracted text
-    df = clean_and_parse_extracted_text(raw_text)
-    # Step 3: Convert the DataFrame to a readable format
-    return df
-# Gradio Interface
-def analyze_blood_report(image):
-    """
-    Analyze the blood test report and return a table with results.
-    """
-    df = process_blood_test(image)
-    return df
-interface = gr.Interface(
-    fn=analyze_blood_report,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.DataFrame(label="Blood Test Analysis"),
-    title="Blood Test Analyzer",
-    description="Upload an image of your blood test report to analyze the values and flag abnormalities.",
-)
-if __name__ == "__main__":
-    interface.launch()

+import streamlit as st
+from PIL import Image
+import pytesseract
 import pandas as pd
+import re
+def extract_text(image):
+    """
+    Extract text from the image using Tesseract.
+    """
+    return pytesseract.image_to_string(image)
 def clean_and_parse_extracted_text(raw_text):
     """
     Parse and clean the raw text to extract structured data.
     """
+    # Split the text into lines and clean up
     lines = raw_text.split("\n")
     lines = [line.strip() for line in lines if line.strip()]
+    # Identify and extract rows with valid components
     data = []
     for line in lines:
+        # Match rows containing numeric ranges and values
         match = re.match(
             r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
             line,
             else:
                 min_val = None
                 max_val = None
             unit = match.group(7)
             flag = "Normal"  # Default flag
             # Determine the flag based on value and range
             if min_val is not None and max_val is not None:
                 if value < min_val:
     # Create a DataFrame
     df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
+    # Fix misspellings and inconsistencies (if any known issues exist)
+    correction_map = {
+        "emoglobin": "Hemoglobin",
+        "ematocrit": "Hematocrit",
+        "% Platelet Count": "Platelet Count",
+        "ymphocyte %": "Lymphocyte %",
+        "L Differential Type Automated": "Differential Type",
+    }
+    df["Component"] = df["Component"].replace(correction_map)
     return df
+def display_results(df):
     """
+    Display the parsed data in a table format.
     """
+    st.dataframe(df, use_container_width=True)
+# Streamlit app
+st.title("Blood Report Analyzer")
+st.write("Upload an image of a blood test report to analyze.")
+uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
+if uploaded_file is not None:
+    try:
+        # Load the image
+        image = Image.open(uploaded_file)
+        # Display the uploaded image
+        st.image(image, caption="Uploaded Image", use_container_width=True)
+        # Extract text from the image
+        extracted_text = extract_text(image)
+        # Parse the extracted text into a structured format
+        parsed_data = clean_and_parse_extracted_text(extracted_text)
+        # Display the structured data
+        st.subheader("Parsed Blood Test Results")
+        display_results(parsed_data)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")