bLoOd_AI / app.py
erayman09's picture
Update app.py
1138794 verified
raw
history blame
3.5 kB
import re
import pandas as pd
import gradio as gr
# Default reference ranges for common blood components
DEFAULT_RANGES = {
"White Blood Cell Count": (4, 11),
"Red Blood Cell Count": (4.4, 6),
"Hemoglobin": (13.5, 18),
"Hematocrit": (40, 52),
"MCV": (80, 100),
"MCH": (27, 33),
"MCHC": (31, 36),
"Neutrophil %": (49, 74),
"Lymphocyte %": (26, 46),
"Monocyte %": (2, 12),
"Eosinophil %": (0, 5),
"Basophil %": (0, 2),
"Abs. Neutrophil": (2.0, 8.0),
"Abs. Lymphocyte": (1.2, 4.8),
"Abs. Monocyte": (0.0, 0.8),
"Abs. Eosinophil": (0.0, 0.5),
"Abs. Basophil": (0.0, 0.2),
}
def clean_and_parse_extracted_text(raw_text):
"""
Parse and clean the raw text to extract structured data.
"""
lines = raw_text.split("\n")
lines = [line.strip() for line in lines if line.strip()]
data = []
for line in lines:
# Match rows with numeric values and optional ranges
match = re.match(
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
line,
)
if match:
component = match.group(1).strip()
value = float(match.group(2))
range_match = match.group(4)
if range_match:
ranges = re.findall(r"[\d.]+", range_match)
min_val = float(ranges[0]) if len(ranges) > 0 else None
max_val = float(ranges[1]) if len(ranges) > 1 else None
else:
min_val = None
max_val = None
unit = match.group(7)
flag = "Normal" # Default flag
# Use default ranges if OCR fails to extract them
if min_val is None or max_val is None:
default_range = DEFAULT_RANGES.get(component)
if default_range:
min_val, max_val = default_range
# Determine the flag based on value and range
if min_val is not None and max_val is not None:
if value < min_val:
flag = "L"
elif value > max_val:
flag = "H"
# Append structured data
data.append([component, value, min_val, max_val, unit, flag])
# Create a DataFrame
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
# Clean up component names further if necessary
df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()
return df
def process_blood_test(image):
"""
Process the uploaded blood test report and return the analyzed data.
"""
# Step 1: Extract text from the image
import pytesseract
raw_text = pytesseract.image_to_string(image)
# Step 2: Parse and analyze the extracted text
df = clean_and_parse_extracted_text(raw_text)
# Step 3: Convert the DataFrame to a readable format
return df
# Gradio Interface
def analyze_blood_report(image):
"""
Analyze the blood test report and return a table with results.
"""
df = process_blood_test(image)
return df
interface = gr.Interface(
fn=analyze_blood_report,
inputs=gr.Image(type="pil"),
outputs=gr.DataFrame(label="Blood Test Analysis"),
title="Blood Test Analyzer",
description="Upload an image of your blood test report to analyze the values and flag abnormalities.",
)
if __name__ == "__main__":
interface.launch()