erayman09 commited on
Commit
cd6b2f5
·
verified ·
1 Parent(s): 1138794

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -58
app.py CHANGED
@@ -1,38 +1,29 @@
1
- import re
 
 
2
  import pandas as pd
3
- import gradio as gr
4
-
5
- # Default reference ranges for common blood components
6
- DEFAULT_RANGES = {
7
- "White Blood Cell Count": (4, 11),
8
- "Red Blood Cell Count": (4.4, 6),
9
- "Hemoglobin": (13.5, 18),
10
- "Hematocrit": (40, 52),
11
- "MCV": (80, 100),
12
- "MCH": (27, 33),
13
- "MCHC": (31, 36),
14
- "Neutrophil %": (49, 74),
15
- "Lymphocyte %": (26, 46),
16
- "Monocyte %": (2, 12),
17
- "Eosinophil %": (0, 5),
18
- "Basophil %": (0, 2),
19
- "Abs. Neutrophil": (2.0, 8.0),
20
- "Abs. Lymphocyte": (1.2, 4.8),
21
- "Abs. Monocyte": (0.0, 0.8),
22
- "Abs. Eosinophil": (0.0, 0.5),
23
- "Abs. Basophil": (0.0, 0.2),
24
- }
25
 
26
  def clean_and_parse_extracted_text(raw_text):
27
  """
28
  Parse and clean the raw text to extract structured data.
29
  """
 
30
  lines = raw_text.split("\n")
31
  lines = [line.strip() for line in lines if line.strip()]
32
 
 
33
  data = []
34
  for line in lines:
35
- # Match rows with numeric values and optional ranges
36
  match = re.match(
37
  r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
38
  line,
@@ -48,16 +39,9 @@ def clean_and_parse_extracted_text(raw_text):
48
  else:
49
  min_val = None
50
  max_val = None
51
-
52
  unit = match.group(7)
53
  flag = "Normal" # Default flag
54
 
55
- # Use default ranges if OCR fails to extract them
56
- if min_val is None or max_val is None:
57
- default_range = DEFAULT_RANGES.get(component)
58
- if default_range:
59
- min_val, max_val = default_range
60
-
61
  # Determine the flag based on value and range
62
  if min_val is not None and max_val is not None:
63
  if value < min_val:
@@ -71,40 +55,49 @@ def clean_and_parse_extracted_text(raw_text):
71
  # Create a DataFrame
72
  df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
73
 
74
- # Clean up component names further if necessary
75
- df["Component"] = df["Component"].str.replace("Sir 2.0", "", regex=False).str.strip()
 
 
 
 
 
 
 
76
 
77
  return df
78
 
79
- def process_blood_test(image):
 
80
  """
81
- Process the uploaded blood test report and return the analyzed data.
82
  """
83
- # Step 1: Extract text from the image
84
- import pytesseract
85
- raw_text = pytesseract.image_to_string(image)
86
 
87
- # Step 2: Parse and analyze the extracted text
88
- df = clean_and_parse_extracted_text(raw_text)
89
 
90
- # Step 3: Convert the DataFrame to a readable format
91
- return df
 
92
 
93
- # Gradio Interface
94
- def analyze_blood_report(image):
95
- """
96
- Analyze the blood test report and return a table with results.
97
- """
98
- df = process_blood_test(image)
99
- return df
 
 
 
 
 
 
 
 
100
 
101
- interface = gr.Interface(
102
- fn=analyze_blood_report,
103
- inputs=gr.Image(type="pil"),
104
- outputs=gr.DataFrame(label="Blood Test Analysis"),
105
- title="Blood Test Analyzer",
106
- description="Upload an image of your blood test report to analyze the values and flag abnormalities.",
107
- )
108
 
109
- if __name__ == "__main__":
110
- interface.launch()
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import pytesseract
4
  import pandas as pd
5
+ import re
6
+
7
+
8
+ def extract_text(image):
9
+ """
10
+ Extract text from the image using Tesseract.
11
+ """
12
+ return pytesseract.image_to_string(image)
13
+
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  def clean_and_parse_extracted_text(raw_text):
16
  """
17
  Parse and clean the raw text to extract structured data.
18
  """
19
+ # Split the text into lines and clean up
20
  lines = raw_text.split("\n")
21
  lines = [line.strip() for line in lines if line.strip()]
22
 
23
+ # Identify and extract rows with valid components
24
  data = []
25
  for line in lines:
26
+ # Match rows containing numeric ranges and values
27
  match = re.match(
28
  r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
29
  line,
 
39
  else:
40
  min_val = None
41
  max_val = None
 
42
  unit = match.group(7)
43
  flag = "Normal" # Default flag
44
 
 
 
 
 
 
 
45
  # Determine the flag based on value and range
46
  if min_val is not None and max_val is not None:
47
  if value < min_val:
 
55
  # Create a DataFrame
56
  df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
57
 
58
+ # Fix misspellings and inconsistencies (if any known issues exist)
59
+ correction_map = {
60
+ "emoglobin": "Hemoglobin",
61
+ "ematocrit": "Hematocrit",
62
+ "% Platelet Count": "Platelet Count",
63
+ "ymphocyte %": "Lymphocyte %",
64
+ "L Differential Type Automated": "Differential Type",
65
+ }
66
+ df["Component"] = df["Component"].replace(correction_map)
67
 
68
  return df
69
 
70
+
71
+ def display_results(df):
72
  """
73
+ Display the parsed data in a table format.
74
  """
75
+ st.dataframe(df, use_container_width=True)
 
 
76
 
 
 
77
 
78
+ # Streamlit app
79
+ st.title("Blood Report Analyzer")
80
+ st.write("Upload an image of a blood test report to analyze.")
81
 
82
+ uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
83
+
84
+ if uploaded_file is not None:
85
+ try:
86
+ # Load the image
87
+ image = Image.open(uploaded_file)
88
+
89
+ # Display the uploaded image
90
+ st.image(image, caption="Uploaded Image", use_container_width=True)
91
+
92
+ # Extract text from the image
93
+ extracted_text = extract_text(image)
94
+
95
+ # Parse the extracted text into a structured format
96
+ parsed_data = clean_and_parse_extracted_text(extracted_text)
97
 
98
+ # Display the structured data
99
+ st.subheader("Parsed Blood Test Results")
100
+ display_results(parsed_data)
 
 
 
 
101
 
102
+ except Exception as e:
103
+ st.error(f"An error occurred: {e}")