|
import re |
|
import pandas as pd |
|
import gradio as gr |
|
|
|
|
|
DEFAULT_RANGES = { |
|
"White Blood Cell Count": (4, 11), |
|
"Red Blood Cell Count": (4.4, 6), |
|
"Hemoglobin": (13.5, 18), |
|
"Hematocrit": (40, 52), |
|
"MCV": (80, 100), |
|
"MCH": (27, 33), |
|
"MCHC": (31, 36), |
|
"Neutrophil %": (49, 74), |
|
"Lymphocyte %": (26, 46), |
|
"Monocyte %": (2, 12), |
|
"Eosinophil %": (0, 5), |
|
"Basophil %": (0, 2), |
|
"Abs. Neutrophil": (2.0, 8.0), |
|
"Abs. Lymphocyte": (1.2, 4.8), |
|
"Abs. Monocyte": (0.0, 0.8), |
|
"Abs. Eosinophil": (0.0, 0.5), |
|
"Abs. Basophil": (0.0, 0.2), |
|
} |
|
|
|
def clean_and_parse_extracted_text(raw_text): |
|
""" |
|
Parse and clean the raw text to extract structured data. |
|
""" |
|
lines = raw_text.split("\n") |
|
lines = [line.strip() for line in lines if line.strip()] |
|
|
|
data = [] |
|
for line in lines: |
|
|
|
match = re.match( |
|
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$", |
|
line, |
|
) |
|
if match: |
|
component = match.group(1).strip() |
|
value = float(match.group(2)) |
|
range_match = match.group(4) |
|
if range_match: |
|
ranges = re.findall(r"[\d.]+", range_match) |
|
min_val = float(ranges[0]) if len(ranges) > 0 else None |
|
max_val = float(ranges[1]) if len(ranges) > 1 else None |
|
else: |
|
min_val = None |
|
max_val = None |
|
|
|
unit = match.group(7) |
|
flag = "Normal" |
|
|
|
|
|
if min_val is None or max_val is None: |
|
default_range = DEFAULT_RANGES.get(component) |
|
if default_range: |
|
min_val, max_val = default_range |
|
|
|
|
|
if min_val is not None and max_val is not None: |
|
if value < min_val: |
|
flag = "L" |
|
elif value > max_val: |
|
flag = "H" |
|
|
|
|
|
data.append([component, value, min_val, max_val, unit, flag]) |
|
|
|
|
|
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"]) |
|
|
|
|
|
df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip() |
|
|
|
return df |
|
|
|
def process_blood_test(image): |
|
""" |
|
Process the uploaded blood test report and return the analyzed data. |
|
""" |
|
|
|
import pytesseract |
|
raw_text = pytesseract.image_to_string(image) |
|
|
|
|
|
df = clean_and_parse_extracted_text(raw_text) |
|
|
|
|
|
return df |
|
|
|
|
|
def analyze_blood_report(image): |
|
""" |
|
Analyze the blood test report and return a table with results. |
|
""" |
|
df = process_blood_test(image) |
|
return df |
|
|
|
interface = gr.Interface( |
|
fn=analyze_blood_report, |
|
inputs=gr.Image(type="pil"), |
|
outputs=gr.DataFrame(label="Blood Test Analysis"), |
|
title="Blood Test Analyzer", |
|
description="Upload an image of your blood test report to analyze the values and flag abnormalities.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
interface.launch() |
|
|