bLoOd_AI / app.py
erayman09's picture
Update app.py
efbd192 verified
raw
history blame
2.24 kB
import pytesseract
import pandas as pd
import re
def extract_text(image):
"""
Extract text from the image using Tesseract.
return pytesseract.image_to_string(image)
def clean_and_parse_extracted_text(raw_text):
"""
Parse and clean the raw text to extract structured data.
"""
# Split the text into lines and clean up
lines = raw_text.split("\n")
lines = [line.strip() for line in lines if line.strip()]
# Identify and extract rows with valid components
data = []
for line in lines:
# Match rows containing numeric ranges and values
match = re.match(
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
line,
unit = match.group(7)
flag = "Normal" # Default flag
# Determine the flag based on value and range
if min_val is not None and max_val is not None:
if value < min_val:
flag = "L"
elif value > max_val:
flag = "H"
# Only append the data if the flag is abnormal (L or H)
if flag != "Normal":
data.append([component, value, min_val, max_val, unit, flag])
# Create a DataFrame
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
# Fix misspellings and inconsistencies (if any known issues exist)
correction_map = {
"emoglobin": "Hemoglobin",
"ematocrit": "Hematocrit",
return df
def display_results(df):
"""
Display the flagged abnormalities in a table format.
"""
st.dataframe(df, use_container_width=True)
# Streamlit app
st.title("Blood Report Analyzer")
st.write("Upload an image of a blood test report to analyze.")
uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
# Parse the extracted text into a structured format
parsed_data = clean_and_parse_extracted_text(extracted_text)
# Display the structured data (only abnormalities)
st.subheader("Flagged Abnormalities")
display_results(parsed_data)
except Exception as e: