erayman09 commited on
Commit
b5e2857
·
verified ·
1 Parent(s): 4e5da36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -60
app.py CHANGED
@@ -1,29 +1,34 @@
1
- import streamlit as st
2
- from PIL import Image
3
- import pytesseract
4
- import pandas as pd
5
- import re
6
-
7
-
8
- def extract_text(image):
9
- """
10
- Extract text from the image using Tesseract.
11
- """
12
- return pytesseract.image_to_string(image)
13
-
 
 
 
 
 
 
 
14
 
15
  def clean_and_parse_extracted_text(raw_text):
16
  """
17
  Parse and clean the raw text to extract structured data.
18
  """
19
- # Split the text into lines and clean up
20
  lines = raw_text.split("\n")
21
  lines = [line.strip() for line in lines if line.strip()]
22
 
23
- # Identify and extract rows with valid components
24
  data = []
25
  for line in lines:
26
- # Match rows containing numeric ranges and values
27
  match = re.match(
28
  r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
29
  line,
@@ -39,9 +44,16 @@ def clean_and_parse_extracted_text(raw_text):
39
  else:
40
  min_val = None
41
  max_val = None
 
42
  unit = match.group(7)
43
  flag = "Normal" # Default flag
44
 
 
 
 
 
 
 
45
  # Determine the flag based on value and range
46
  if min_val is not None and max_val is not None:
47
  if value < min_val:
@@ -55,49 +67,7 @@ def clean_and_parse_extracted_text(raw_text):
55
  # Create a DataFrame
56
  df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
57
 
58
- # Fix misspellings and inconsistencies (if any known issues exist)
59
- correction_map = {
60
- "emoglobin": "Hemoglobin",
61
- "ematocrit": "Hematocrit",
62
- "% Platelet Count": "Platelet Count",
63
- "ymphocyte %": "Lymphocyte %",
64
- "L Differential Type Automated": "Differential Type",
65
- }
66
- df["Component"] = df["Component"].replace(correction_map)
67
 
68
  return df
69
-
70
-
71
- def display_results(df):
72
- """
73
- Display the parsed data in a table format.
74
- """
75
- st.dataframe(df, use_container_width=True)
76
-
77
-
78
- # Streamlit app
79
- st.title("Blood Report Analyzer")
80
- st.write("Upload an image of a blood test report to analyze.")
81
-
82
- uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
83
-
84
- if uploaded_file is not None:
85
- try:
86
- # Load the image
87
- image = Image.open(uploaded_file)
88
-
89
- # Display the uploaded image
90
- st.image(image, caption="Uploaded Image", use_container_width=True)
91
-
92
- # Extract text from the image
93
- extracted_text = extract_text(image)
94
-
95
- # Parse the extracted text into a structured format
96
- parsed_data = clean_and_parse_extracted_text(extracted_text)
97
-
98
- # Display the structured data
99
- st.subheader("Parsed Blood Test Results")
100
- display_results(parsed_data)
101
-
102
- except Exception as e:
103
- st.error(f"An error occurred: {e}")
 
1
+ # Default reference ranges for common blood components
2
+ DEFAULT_RANGES = {
3
+ "White Blood Cell Count": (4, 11),
4
+ "Red Blood Cell Count": (4.4, 6),
5
+ "Hemoglobin": (13.5, 18),
6
+ "Hematocrit": (40, 52),
7
+ "MCV": (80, 100),
8
+ "MCH": (27, 33),
9
+ "MCHC": (31, 36),
10
+ "Neutrophil %": (49, 74),
11
+ "Lymphocyte %": (26, 46),
12
+ "Monocyte %": (2, 12),
13
+ "Eosinophil %": (0, 5),
14
+ "Basophil %": (0, 2),
15
+ "Abs. Neutrophil": (2.0, 8.0),
16
+ "Abs. Lymphocyte": (1.2, 4.8),
17
+ "Abs. Monocyte": (0.0, 0.8),
18
+ "Abs. Eosinophil": (0.0, 0.5),
19
+ "Abs. Basophil": (0.0, 0.2),
20
+ }
21
 
22
  def clean_and_parse_extracted_text(raw_text):
23
  """
24
  Parse and clean the raw text to extract structured data.
25
  """
 
26
  lines = raw_text.split("\n")
27
  lines = [line.strip() for line in lines if line.strip()]
28
 
 
29
  data = []
30
  for line in lines:
31
+ # Match rows with numeric values and optional ranges
32
  match = re.match(
33
  r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
34
  line,
 
44
  else:
45
  min_val = None
46
  max_val = None
47
+
48
  unit = match.group(7)
49
  flag = "Normal" # Default flag
50
 
51
+ # Use default ranges if OCR fails to extract them
52
+ if min_val is None or max_val is None:
53
+ default_range = DEFAULT_RANGES.get(component)
54
+ if default_range:
55
+ min_val, max_val = default_range
56
+
57
  # Determine the flag based on value and range
58
  if min_val is not None and max_val is not None:
59
  if value < min_val:
 
67
  # Create a DataFrame
68
  df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
69
 
70
+ # Clean up component names further if necessary
71
+ df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()
 
 
 
 
 
 
 
72
 
73
  return df