erayman09 commited on
Commit
22d46ec
·
verified ·
1 Parent(s): efbd192

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -96
app.py CHANGED
@@ -1,105 +1,26 @@
 
 
1
  import pytesseract
2
  import pandas as pd
3
  import re
4
 
5
-
6
-
7
-
8
-
9
-
10
-
11
-
12
- def extract_text(image):
13
- """
14
- Extract text from the image using Tesseract.
15
- return pytesseract.image_to_string(image)
16
-
17
-
18
-
19
- def clean_and_parse_extracted_text(raw_text):
20
- """
21
- Parse and clean the raw text to extract structured data.
22
- """
23
- # Split the text into lines and clean up
24
- lines = raw_text.split("\n")
25
- lines = [line.strip() for line in lines if line.strip()]
26
-
27
- # Identify and extract rows with valid components
28
- data = []
29
- for line in lines:
30
- # Match rows containing numeric ranges and values
31
- match = re.match(
32
- r"^(.*?)(\d+(\.\d+)?)(\s*-?\s*\d+(\.\d+)?\s*-?\s*\d+(\.\d+)?)?\s*([a-zA-Z/%]+)?\s*(H|L|Normal)?$",
33
- line,
34
- unit = match.group(7)
35
- flag = "Normal" # Default flag
36
-
37
- # Determine the flag based on value and range
38
- if min_val is not None and max_val is not None:
39
- if value < min_val:
40
- flag = "L"
41
- elif value > max_val:
42
- flag = "H"
43
-
44
- # Only append the data if the flag is abnormal (L or H)
45
- if flag != "Normal":
46
- data.append([component, value, min_val, max_val, unit, flag])
47
-
48
- # Create a DataFrame
49
- df = pd.DataFrame(data, columns=["Component", "Your Value", "Min", "Max", "Units", "Flag"])
50
-
51
- # Fix misspellings and inconsistencies (if any known issues exist)
52
- correction_map = {
53
- "emoglobin": "Hemoglobin",
54
- "ematocrit": "Hematocrit",
55
- return df
56
-
57
-
58
-
59
-
60
-
61
-
62
-
63
-
64
-
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
-
73
-
74
-
75
-
76
- def display_results(df):
77
- """
78
- Display the flagged abnormalities in a table format.
79
- """
80
-
81
-
82
-
83
-
84
-
85
-
86
- st.dataframe(df, use_container_width=True)
87
-
88
-
89
-
90
-
91
-
92
- # Streamlit app
93
- st.title("Blood Report Analyzer")
94
- st.write("Upload an image of a blood test report to analyze.")
95
 
96
  uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
97
 
98
- # Parse the extracted text into a structured format
99
- parsed_data = clean_and_parse_extracted_text(extracted_text)
 
 
 
100
 
101
- # Display the structured data (only abnormalities)
102
- st.subheader("Flagged Abnormalities")
103
- display_results(parsed_data)
104
 
105
- except Exception as e:
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
  import pytesseract
4
  import pandas as pd
5
  import re
6
 
7
+ st.title("Blood Test Analyzer with RAG")
8
+ st.write("Upload an image of your blood test report to analyze and get recommendations.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  uploaded_file = st.file_uploader("Upload Image", type=["png", "jpg", "jpeg"])
11
 
12
+ if uploaded_file is not None:
13
+ try:
14
+ # Load the image
15
+ image = Image.open(uploaded_file)
16
+ st.image(image, caption="Uploaded Image", use_container_width=True)
17
 
18
+ # Step 1: Extract text using Tesseract
19
+ extracted_text = pytesseract.image_to_string(image)
20
+ st.text_area("Extracted Text", extracted_text, height=200)
21
 
22
+ # Placeholder for parsed data
23
+ st.subheader("Flagged Abnormalities")
24
+ st.write("Parsing logic and RAG recommendations will go here.")
25
+ except Exception as e:
26
+ st.error(f"An error occurred: {e}")