erayman09 commited on
Commit
9e4acbd
·
verified ·
1 Parent(s): 60ea1bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -17
app.py CHANGED
@@ -1,25 +1,49 @@
1
  import streamlit as st
2
  from PIL import Image
3
- import easyocr
 
 
4
 
5
- # Title of the App
6
- st.title("Image-to-Text Converter for Blood Test Reports")
7
 
8
- # Allow user to upload an image
9
- uploaded_image = st.file_uploader("Upload a Blood Test Report Image", type=["png", "jpg", "jpeg"])
10
 
11
- if uploaded_image is not None:
12
- # Display the uploaded image
13
- st.image(uploaded_image, caption="Uploaded Blood Test Report", use_container_width=True)
 
14
 
15
- # Initialize EasyOCR Reader
16
- st.write("Extracting text...")
17
- reader = easyocr.Reader(['en']) # Specify language
18
- image = Image.open(uploaded_image)
19
 
20
- # Perform OCR
21
- extracted_text = reader.readtext(image, detail=0) # Extract text without bounding box details
22
 
23
- # Display the extracted text
24
- st.subheader("Extracted Text:")
25
- st.text("\n".join(extracted_text))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  from PIL import Image
3
+ import pytesseract
4
+ import re
5
+ import pandas as pd
6
 
7
+ # Streamlit App
8
+ st.title("Blood Test Report Parser")
9
 
10
+ # File uploader
11
+ uploaded_file = st.file_uploader("Upload a blood test report image", type=["jpg", "jpeg", "png"])
12
 
13
+ if uploaded_file is not None:
14
+ # Display uploaded image
15
+ image = Image.open(uploaded_file)
16
+ st.image(image, caption="Uploaded Image", use_container_width=True)
17
 
18
+ # Extract text using Tesseract OCR
19
+ with st.spinner("Extracting text from image..."):
20
+ extracted_text = pytesseract.image_to_string(image)
21
+ st.text_area("Extracted Text", extracted_text, height=300)
22
 
23
+ # Regex Pattern for Parsing
24
+ pattern = r"(?P<component>[A-Za-z\s%]+?)\s+(?P<your_value>[\d.]+)\s+(?P<range>[\d.]+ - [\d.]+)\s+(?P<units>[\w/%]+)(?:\s+(?P<flag>[LH]))?"
25
 
26
+ # Parse Extracted Text
27
+ data = []
28
+ for match in re.finditer(pattern, extracted_text):
29
+ component = match.group("component").strip()
30
+ your_value = float(match.group("your_value"))
31
+ range_min, range_max = map(float, match.group("range").split(" - "))
32
+ units = match.group("units")
33
+ flag = match.group("flag") if match.group("flag") else "Normal"
34
+ data.append({
35
+ "Component": component,
36
+ "Your Value": your_value,
37
+ "Min": range_min,
38
+ "Max": range_max,
39
+ "Units": units,
40
+ "Flag": flag
41
+ })
42
+
43
+ # Convert to DataFrame
44
+ if data:
45
+ df = pd.DataFrame(data)
46
+ st.success("Parsed Data Successfully!")
47
+ st.dataframe(df)
48
+ else:
49
+ st.error("No valid data found in the extracted text.")