File size: 1,671 Bytes
3f3a781
 
9e4acbd
 
 
3f3a781
9e4acbd
 
3f3a781
9e4acbd
 
3f3a781
9e4acbd
 
 
 
3f3a781
9e4acbd
 
 
 
3f3a781
9e4acbd
 
3f3a781
9e4acbd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import streamlit as st
from PIL import Image
import pytesseract
import re
import pandas as pd

# Streamlit App
st.title("Blood Test Report Parser")

# File uploader
uploaded_file = st.file_uploader("Upload a blood test report image", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    # Display uploaded image
    image = Image.open(uploaded_file)
    st.image(image, caption="Uploaded Image", use_container_width=True)
    
    # Extract text using Tesseract OCR
    with st.spinner("Extracting text from image..."):
        extracted_text = pytesseract.image_to_string(image)
    st.text_area("Extracted Text", extracted_text, height=300)
    
    # Regex Pattern for Parsing
    pattern = r"(?P<component>[A-Za-z\s%]+?)\s+(?P<your_value>[\d.]+)\s+(?P<range>[\d.]+ - [\d.]+)\s+(?P<units>[\w/%]+)(?:\s+(?P<flag>[LH]))?"
    
    # Parse Extracted Text
    data = []
    for match in re.finditer(pattern, extracted_text):
        component = match.group("component").strip()
        your_value = float(match.group("your_value"))
        range_min, range_max = map(float, match.group("range").split(" - "))
        units = match.group("units")
        flag = match.group("flag") if match.group("flag") else "Normal"
        data.append({
            "Component": component,
            "Your Value": your_value,
            "Min": range_min,
            "Max": range_max,
            "Units": units,
            "Flag": flag
        })
    
    # Convert to DataFrame
    if data:
        df = pd.DataFrame(data)
        st.success("Parsed Data Successfully!")
        st.dataframe(df)
    else:
        st.error("No valid data found in the extracted text.")