Spaces:

erayman09
/

bLoOd_AI

Sleeping

App Files Files Community

bLoOd_AI / app.py

erayman09

Update app.py

1138794 verified 9 months ago

raw

history blame

3.5 kB

	import re
	import pandas as pd
	import gradio as gr

	# Default reference ranges for common blood components
	DEFAULT_RANGES = {
	"White Blood Cell Count": (4, 11),
	"Red Blood Cell Count": (4.4, 6),
	"Hemoglobin": (13.5, 18),
	"Hematocrit": (40, 52),
	"MCV": (80, 100),
	"MCH": (27, 33),
	"MCHC": (31, 36),
	"Neutrophil %": (49, 74),
	"Lymphocyte %": (26, 46),
	"Monocyte %": (2, 12),
	"Eosinophil %": (0, 5),
	"Basophil %": (0, 2),
	"Abs. Neutrophil": (2.0, 8.0),
	"Abs. Lymphocyte": (1.2, 4.8),
	"Abs. Monocyte": (0.0, 0.8),
	"Abs. Eosinophil": (0.0, 0.5),
	"Abs. Basophil": (0.0, 0.2),
	}

	def clean_and_parse_extracted_text(raw_text):
	"""
	Parse and clean the raw text to extract structured data.
	"""
	lines = raw_text.split("\n")
	lines = [line.strip() for line in lines if line.strip()]

	data = []
	for line in lines:
	# Match rows with numeric values and optional ranges
	match = re.match(
	r"^(.?)(\d+(\.\d+)?)(\s-?\s\d+(\.\d+)?\s-?\s\d+(\.\d+)?)?\s([a-zA-Z/%]+)?\s*(H\|L\|Normal)?$",
	line,
	)
	if match:
	component = match.group(1).strip()
	value = float(match.group(2))
	range_match = match.group(4)
	if range_match:
	ranges = re.findall(r"[\d.]+", range_match)
	min_val = float(ranges[0]) if len(ranges) > 0 else None
	max_val = float(ranges[1]) if len(ranges) > 1 else None
	else:
	min_val = None
	max_val = None

	unit = match.group(7)
	flag = "Normal" # Default flag

	# Use default ranges if OCR fails to extract them
	if min_val is None or max_val is None:
	default_range = DEFAULT_RANGES.get(component)
	if default_range:
	min_val, max_val = default_range

	# Determine the flag based on value and range
	if min_val is not None and max_val is not None:
	if value < min_val:
	flag = "L"
	elif value > max_val:
	flag = "H"

	# Append structured data
	data.append([component, value, min_val, max_val, unit, flag])

	# Create a DataFrame
	df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])

	# Clean up component names further if necessary
	df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()

	return df

	def process_blood_test(image):
	"""
	Process the uploaded blood test report and return the analyzed data.
	"""
	# Step 1: Extract text from the image
	import pytesseract
	raw_text = pytesseract.image_to_string(image)

	# Step 2: Parse and analyze the extracted text
	df = clean_and_parse_extracted_text(raw_text)

	# Step 3: Convert the DataFrame to a readable format
	return df

	# Gradio Interface
	def analyze_blood_report(image):
	"""
	Analyze the blood test report and return a table with results.
	"""
	df = process_blood_test(image)
	return df

	interface = gr.Interface(
	fn=analyze_blood_report,
	inputs=gr.Image(type="pil"),
	outputs=gr.DataFrame(label="Blood Test Analysis"),
	title="Blood Test Analyzer",
	description="Upload an image of your blood test report to analyze the values and flag abnormalities.",
	)

	if __name__ == "__main__":
	interface.launch()