|
import pytesseract |
|
import pandas as pd |
|
import re |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_text(image): |
|
""" |
|
Extract text from the image using Tesseract. |
|
return pytesseract.image_to_string(image) |
|
|
|
|
|
|
|
def clean_and_parse_extracted_text(raw_text): |
|
""" |
|
Parse and clean the raw text to extract structured data. |
|
""" |
|
# Split the text into lines and clean up |
|
lines = raw_text.split("\n") |
|
lines = [line.strip() for line in lines if line.strip()] |
|
|
|
# Identify and extract rows with valid components |
|
data = [] |
|
for line in lines: |
|
# Match rows containing numeric ranges and values |
|
match = re.match( |
|
r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$", |
|
line, |
|
unit = match.group(7) |
|
flag = "Normal" # Default flag |
|
|
|
# Determine the flag based on value and range |
|
if min_val is not None and max_val is not None: |
|
if value < min_val: |
|
flag = "L" |
|
elif value > max_val: |
|
flag = "H" |
|
|
|
# Only append the data if the flag is abnormal (L or H) |
|
if flag != "Normal": |
|
data.append([component, value, min_val, max_val, unit, flag]) |
|
|
|
# Create a DataFrame |
|
df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"]) |
|
|
|
# Fix misspellings and inconsistencies (if any known issues exist) |
|
correction_map = { |
|
"emoglobin": "Hemoglobin", |
|
"ematocrit": "Hematocrit", |
|
return df |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def display_results(df): |
|
""" |
|
Display the flagged abnormalities in a table format. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
|
|
|
|
|
|
# Streamlit app |
|
st.title("Blood Report Analyzer") |
|
st.write("Upload an image of a blood test report to analyze.") |
|
|
|
uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"]) |
|
|
|
# Parse the extracted text into a structured format |
|
parsed_data = clean_and_parse_extracted_text(extracted_text) |
|
|
|
# Display the structured data (only abnormalities) |
|
st.subheader("Flagged Abnormalities") |
|
display_results(parsed_data) |
|
|
|
except Exception as e: |