Spaces:

erayman09
/

bLoOd_AI

Sleeping

App Files Files Community

erayman09 commited on Dec 9, 2024

Commit

4fd374a

verified ·

1 Parent(s): 45352c6

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -41

app.py CHANGED Viewed

@@ -1,49 +1,96 @@
 import streamlit as st
 from PIL import Image
 import pytesseract
-import re
 import pandas as pd
-# Streamlit App
-st.title("Blood Test Report Parser")
-# File uploader
-uploaded_file = st.file_uploader("Upload a blood test report image", type=["jpg", "jpeg", "png"])
-if uploaded_file is not None:
-    # Display uploaded image
-    image = Image.open(uploaded_file)
-    st.image(image, caption="Uploaded Image", use_container_width=True)
-    # Extract text using Tesseract OCR
-    with st.spinner("Extracting text from image..."):
-        extracted_text = pytesseract.image_to_string(image)
-    st.text_area("Extracted Text", extracted_text, height=300)
-    # Regex Pattern for Parsing
-    pattern = r"(?P<component>[A-Za-z\s%]+?)\s+(?P<your_value>[\d.]+)\s+(?P<range>[\d.]+ - [\d.]+)\s+(?P<units>[\w/%]+)(?:\s+(?P<flag>[LH]))?"
-    # Parse Extracted Text
     data = []
-    for match in re.finditer(pattern, extracted_text):
-        component = match.group("component").strip()
-        your_value = float(match.group("your_value"))
-        range_min, range_max = map(float, match.group("range").split(" - "))
-        units = match.group("units")
-        flag = match.group("flag") if match.group("flag") else "Normal"
-        data.append({
-            "Component": component,
-            "Your Value": your_value,
-            "Min": range_min,
-            "Max": range_max,
-            "Units": units,
-            "Flag": flag
-        })
-    # Convert to DataFrame
-    if data:
-        df = pd.DataFrame(data)
-        st.success("Parsed Data Successfully!")
-        st.dataframe(df)
-    else:
-        st.error("No valid data found in the extracted text.")

 import streamlit as st
 from PIL import Image
 import pytesseract
 import pandas as pd
+import re
+def extract_text(image):
+    """
+    Extract text from the image using Tesseract.
+    """
+    return pytesseract.image_to_string(image)
+def clean_and_parse_extracted_text(raw_text):
+    """
+    Parse and clean the raw text to extract structured data.
+    """
+    # Split the text into lines and clean up
+    lines = raw_text.split("\n")
+    lines = [line.strip() for line in lines if line.strip()]
+    # Identify and extract rows with valid components
     data = []
+    for line in lines:
+        # Match rows containing numeric ranges and values
+        match = re.match(
+            r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
+            line,
+        )
+        if match:
+            component = match.group(1).strip()
+            value = float(match.group(2))
+            range_match = match.group(4)
+            if range_match:
+                ranges = re.findall(r"[\d.]+", range_match)
+                min_val = float(ranges[0])
+                max_val = float(ranges[1])
+            else:
+                min_val = None
+                max_val = None
+            unit = match.group(7)
+            flag = match.group(8) if match.group(8) else "Normal"
+            # Append structured data
+            data.append([component, value, min_val, max_val, unit, flag])
+    # Create a DataFrame
+    df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
+    # Fix misspellings and inconsistencies (if any known issues exist)
+    correction_map = {
+        "emoglobin": "Hemoglobin",
+        "ematocrit": "Hematocrit",
+        "% Platelet Count": "Platelet Count",
+        "ymphocyte %": "Lymphocyte %",
+        "L Differential Type Automated": "Differential Type",
+    }
+    df["Component"] = df["Component"].replace(correction_map)
+    return df
+def display_results(df):
+    """
+    Display the parsed data in a table format.
+    """
+    st.dataframe(df, use_container_width=True)
+# Streamlit app
+st.title("Blood Report Analyzer")
+st.write("Upload an image of a blood test report to analyze.")
+uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
+if uploaded_file is not None:
+    try:
+        # Load the image
+        image = Image.open(uploaded_file)
+        # Display the uploaded image
+        st.image(image, caption="Uploaded Image", use_container_width=True)
+        # Extract text from the image
+        extracted_text = extract_text(image)
+        # Parse the extracted text into a structured format
+        parsed_data = clean_and_parse_extracted_text(extracted_text)
+        # Display the structured data
+        st.subheader("Parsed Blood Test Results")
+        display_results(parsed_data)
+    except Exception as e:
+        st.error(f"An error occurred: {e}")