Spaces:

erayman09
/

bLoOd_AI

Sleeping

File size: 3,502 Bytes

import re
import pandas as pd
import gradio as gr

# Default reference ranges for common blood components
DEFAULT_RANGES = {
    "White Blood Cell Count": (4, 11),
    "Red Blood Cell Count": (4.4, 6),
    "Hemoglobin": (13.5, 18),
    "Hematocrit": (40, 52),
    "MCV": (80, 100),
    "MCH": (27, 33),
    "MCHC": (31, 36),
    "Neutrophil %": (49, 74),
    "Lymphocyte %": (26, 46),
    "Monocyte %": (2, 12),
    "Eosinophil %": (0, 5),
    "Basophil %": (0, 2),
    "Abs. Neutrophil": (2.0, 8.0),
    "Abs. Lymphocyte": (1.2, 4.8),
    "Abs. Monocyte": (0.0, 0.8),
    "Abs. Eosinophil": (0.0, 0.5),
    "Abs. Basophil": (0.0, 0.2),
}

def clean_and_parse_extracted_text(raw_text):
    """
    Parse and clean the raw text to extract structured data.
    """
    lines = raw_text.split("\n")
    lines = [line.strip() for line in lines if line.strip()]

    data = []
    for line in lines:
        # Match rows with numeric values and optional ranges
        match = re.match(
            r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
            line,
        )
        if match:
            component = match.group(1).strip()
            value = float(match.group(2))
            range_match = match.group(4)
            if range_match:
                ranges = re.findall(r"[\d.]+", range_match)
                min_val = float(ranges[0]) if len(ranges) > 0 else None
                max_val = float(ranges[1]) if len(ranges) > 1 else None
            else:
                min_val = None
                max_val = None

            unit = match.group(7)
            flag = "Normal"  # Default flag

            # Use default ranges if OCR fails to extract them
            if min_val is None or max_val is None:
                default_range = DEFAULT_RANGES.get(component)
                if default_range:
                    min_val, max_val = default_range

            # Determine the flag based on value and range
            if min_val is not None and max_val is not None:
                if value < min_val:
                    flag = "L"
                elif value > max_val:
                    flag = "H"

            # Append structured data
            data.append([component, value, min_val, max_val, unit, flag])

    # Create a DataFrame
    df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])

    # Clean up component names further if necessary
    df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()

    return df

def process_blood_test(image):
    """
    Process the uploaded blood test report and return the analyzed data.
    """
    # Step 1: Extract text from the image
    import pytesseract
    raw_text = pytesseract.image_to_string(image)

    # Step 2: Parse and analyze the extracted text
    df = clean_and_parse_extracted_text(raw_text)

    # Step 3: Convert the DataFrame to a readable format
    return df

# Gradio Interface
def analyze_blood_report(image):
    """
    Analyze the blood test report and return a table with results.
    """
    df = process_blood_test(image)
    return df

interface = gr.Interface(
    fn=analyze_blood_report,
    inputs=gr.Image(type="pil"),
    outputs=gr.DataFrame(label="Blood Test Analysis"),
    title="Blood Test Analyzer",
    description="Upload an image of your blood test report to analyze the values and flag abnormalities.",
)

if __name__ == "__main__":
    interface.launch()