kavithapadala commited on
Commit
cb2c851
·
verified ·
1 Parent(s): 7d64a6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -120
app.py CHANGED
@@ -1,120 +1,120 @@
1
- import streamlit as st
2
- import torch
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
- import pandas as pd
5
- import numpy as np
6
- from sklearn.preprocessing import MultiLabelBinarizer
7
-
8
- # Check if a GPU is available
9
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
-
11
- # Load the trained model and tokenizer
12
- @st.cache_resource
13
- def load_model():
14
- model = AutoModelForSequenceClassification.from_pretrained(
15
- "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract",
16
- num_labels=8, # Adjust based on your label count
17
- problem_type="multi_label_classification"
18
- )
19
- model.load_state_dict(torch.load('best_model_v2.pth'))
20
- model.eval()
21
- tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract")
22
- model = model.to(device) # Move the model to the correct device
23
-
24
- return model, tokenizer
25
-
26
- @st.cache_resource
27
- def load_mlb():
28
- # Define the classes based on your label set
29
- # classes = ['E11.9', 'I10', 'J45.909', 'M54.5', 'N39.0', '81001.0', '99213.0', '99214.0']
30
- classes = ['81001.0','99213.0','99214.0','E11.9','I10','J45.909','M54.5','N39.0']
31
- # Initialize and fit the MultiLabelBinarizer
32
- mlb = MultiLabelBinarizer(classes=classes)
33
- mlb.fit([classes]) # Fit with the full list of labels as a single sample
34
-
35
- return mlb
36
-
37
-
38
- # # Load MultiLabelBinarizer
39
- # @st.cache_resource
40
- # def load_mlb():
41
- # mlb = MultiLabelBinarizer()
42
- # # mlb.classes_ = np.load('mlb_classes.npy') # Assuming you saved the classes array during training
43
- # mlb = MultiLabelBinarizer(classes=['E11.9', 'I10', 'J45.909', 'M54.5',
44
- # 'N39.0', '81001.0', '99213.0', '99214.0']) # Update with actual labels
45
-
46
- # return mlb
47
-
48
- model, tokenizer = load_model()
49
- mlb = load_mlb()
50
-
51
- # Streamlit UI
52
- st.title("Automated Coding and Billing Prediction")
53
- # st.write("Enter clinical notes to predict ICD and CPT codes.")
54
-
55
- # Text input for Clinical Notes
56
- clinical_note = st.text_area("Enter clinical notes to predict ICD and CPT codes")
57
-
58
- # Prediction button
59
- if st.button('Predict'):
60
- if clinical_note:
61
- # Tokenize the input clinical note
62
- inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
63
-
64
- # Move inputs to the GPU if available
65
- inputs = {key: val.to(device) for key, val in inputs.items()}
66
-
67
- # Model inference
68
- with torch.no_grad():
69
- outputs = model(**inputs)
70
- logits = outputs.logits
71
-
72
- # Apply sigmoid and threshold the output (0.5 for multi-label classification)
73
- pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
74
-
75
- # Get the predicted ICD and CPT codes
76
- predicted_codes = mlb.inverse_transform(pred_labels)
77
-
78
- # Format the results for better display
79
- if predicted_codes:
80
- st.write("**Predicted ICD and CPT Codes:**")
81
- for codes in predicted_codes:
82
- for code in codes:
83
- if code in ['81001.0', '99213.0', '99214.0']: # Adjust based on your CPT code list
84
- st.write(f"- **CPT Code:** {code}")
85
- else:
86
- st.write(f"- **ICD Code:** {code}")
87
- else:
88
- st.write("No codes predicted.")
89
-
90
- # else:
91
- # st.write("Please enter clinical notes for prediction.")
92
-
93
-
94
- # # Prediction button
95
- # if st.button('Predict'):
96
- # if clinical_note:
97
- # # Tokenize the input clinical note
98
- # inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
99
-
100
- # # Move inputs to the GPU if available
101
- # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
- # inputs = {key: val.to(device) for key, val in inputs.items()}
103
-
104
- # # Model inference
105
- # with torch.no_grad():
106
- # outputs = model(**inputs)
107
- # logits = outputs.logits
108
-
109
- # # Apply sigmoid and threshold the output (0.5 for multi-label classification)
110
- # pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
111
-
112
- # # Get the predicted ICD and CPT codes
113
- # predicted_codes = mlb.inverse_transform(pred_labels)
114
-
115
- # # Show the results
116
- # st.write("Predicted ICD and CPT Codes:")
117
- # st.write(predicted_codes)
118
-
119
- # else:
120
- # st.write("Please enter clinical notes for prediction.")
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.preprocessing import MultiLabelBinarizer
7
+
8
+ # Check if a GPU is available
9
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ # Load the trained model and tokenizer
12
+ @st.cache_resource
13
+ def load_model():
14
+ model = AutoModelForSequenceClassification.from_pretrained(
15
+ "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract",
16
+ num_labels=8, # Adjust based on your label count
17
+ problem_type="multi_label_classification"
18
+ )
19
+ model.load_state_dict(torch.load('best_model_v2.pth'))
20
+ model.eval()
21
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract")
22
+ model = model.to(device) # Move the model to the correct device
23
+
24
+ return model, tokenizer
25
+
26
+ @st.cache_resource
27
+ def load_mlb():
28
+ # Define the classes based on your label set
29
+ # classes = ['E11.9', 'I10', 'J45.909', 'M54.5', 'N39.0', '81001.0', '99213.0', '99214.0']
30
+ classes = ['81001.0','99213.0','99214.0','E11.9','I10','J45.909','M54.5','N39.0']
31
+ # Initialize and fit the MultiLabelBinarizer
32
+ mlb = MultiLabelBinarizer(classes=classes)
33
+ mlb.fit([classes]) # Fit with the full list of labels as a single sample
34
+
35
+ return mlb
36
+
37
+
38
+ # # Load MultiLabelBinarizer
39
+ # @st.cache_resource
40
+ # def load_mlb():
41
+ # mlb = MultiLabelBinarizer()
42
+ # # mlb.classes_ = np.load('mlb_classes.npy') # Assuming you saved the classes array during training
43
+ # mlb = MultiLabelBinarizer(classes=['E11.9', 'I10', 'J45.909', 'M54.5',
44
+ # 'N39.0', '81001.0', '99213.0', '99214.0']) # Update with actual labels
45
+
46
+ # return mlb
47
+
48
+ model, tokenizer = load_model()
49
+ mlb = load_mlb()
50
+
51
+ # Streamlit UI
52
+ st.title("Automated Coding and Billing Prediction")
53
+ # st.write("Enter clinical notes to predict ICD and CPT codes.")
54
+
55
+ # Text input for Clinical Notes
56
+ clinical_note = st.text_area("Enter clinical notes to predict ICD and CPT codes")
57
+
58
+ # Prediction button
59
+ if st.button('Predict'):
60
+ if clinical_note:
61
+ # Tokenize the input clinical note
62
+ inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
63
+
64
+ # Move inputs to the GPU if available
65
+ inputs = {key: val.to(device) for key, val in inputs.items()}
66
+
67
+ # Model inference
68
+ with torch.no_grad():
69
+ outputs = model(**inputs)
70
+ logits = outputs.logits
71
+
72
+ # Apply sigmoid and threshold the output (0.5 for multi-label classification)
73
+ pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
74
+
75
+ # Get the predicted ICD and CPT codes
76
+ predicted_codes = mlb.inverse_transform(pred_labels)
77
+
78
+ # Format the results for better display
79
+ if predicted_codes:
80
+ st.write("**Predicted ICD and CPT Codes:**")
81
+ for codes in predicted_codes:
82
+ for code in codes:
83
+ if code in ['81001.0', '99213.0', '99214.0']: # Adjust based on your CPT code list
84
+ st.write(f"- **CPT Code:** {code}")
85
+ else:
86
+ st.write(f"- **ICD Code:** {code}")
87
+ else:
88
+ st.write("No codes predicted.")
89
+
90
+ # else:
91
+ # st.write("Please enter clinical notes for prediction.")
92
+
93
+
94
+ # # Prediction button
95
+ # if st.button('Predict'):
96
+ # if clinical_note:
97
+ # # Tokenize the input clinical note
98
+ # inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
99
+
100
+ # # Move inputs to the GPU if available
101
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
+ # inputs = {key: val.to(device) for key, val in inputs.items()}
103
+
104
+ # # Model inference
105
+ # with torch.no_grad():
106
+ # outputs = model(**inputs)
107
+ # logits = outputs.logits
108
+
109
+ # # Apply sigmoid and threshold the output (0.5 for multi-label classification)
110
+ # pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
111
+
112
+ # # Get the predicted ICD and CPT codes
113
+ # predicted_codes = mlb.inverse_transform(pred_labels)
114
+
115
+ # # Show the results
116
+ # st.write("Predicted ICD and CPT Codes:")
117
+ # st.write(predicted_codes)
118
+
119
+ # else:
120
+ # st.write("Please enter clinical notes for prediction.")