kavithapadala commited on
Commit
7d64a6d
·
verified ·
1 Parent(s): 7e71a4d

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +120 -0
  2. best_model_v2.pth +3 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
+ import pandas as pd
5
+ import numpy as np
6
+ from sklearn.preprocessing import MultiLabelBinarizer
7
+
8
+ # Check if a GPU is available
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+
11
+ # Load the trained model and tokenizer
12
+ @st.cache_resource
13
+ def load_model():
14
+ model = AutoModelForSequenceClassification.from_pretrained(
15
+ "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract",
16
+ num_labels=8, # Adjust based on your label count
17
+ problem_type="multi_label_classification"
18
+ )
19
+ model.load_state_dict(torch.load('best_model_v2.pth'))
20
+ model.eval()
21
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract")
22
+ model = model.to(device) # Move the model to the correct device
23
+
24
+ return model, tokenizer
25
+
26
+ @st.cache_resource
27
+ def load_mlb():
28
+ # Define the classes based on your label set
29
+ # classes = ['E11.9', 'I10', 'J45.909', 'M54.5', 'N39.0', '81001.0', '99213.0', '99214.0']
30
+ classes = ['81001.0','99213.0','99214.0','E11.9','I10','J45.909','M54.5','N39.0']
31
+ # Initialize and fit the MultiLabelBinarizer
32
+ mlb = MultiLabelBinarizer(classes=classes)
33
+ mlb.fit([classes]) # Fit with the full list of labels as a single sample
34
+
35
+ return mlb
36
+
37
+
38
+ # # Load MultiLabelBinarizer
39
+ # @st.cache_resource
40
+ # def load_mlb():
41
+ # mlb = MultiLabelBinarizer()
42
+ # # mlb.classes_ = np.load('mlb_classes.npy') # Assuming you saved the classes array during training
43
+ # mlb = MultiLabelBinarizer(classes=['E11.9', 'I10', 'J45.909', 'M54.5',
44
+ # 'N39.0', '81001.0', '99213.0', '99214.0']) # Update with actual labels
45
+
46
+ # return mlb
47
+
48
+ model, tokenizer = load_model()
49
+ mlb = load_mlb()
50
+
51
+ # Streamlit UI
52
+ st.title("Automated Coding and Billing Prediction")
53
+ # st.write("Enter clinical notes to predict ICD and CPT codes.")
54
+
55
+ # Text input for Clinical Notes
56
+ clinical_note = st.text_area("Enter clinical notes to predict ICD and CPT codes")
57
+
58
+ # Prediction button
59
+ if st.button('Predict'):
60
+ if clinical_note:
61
+ # Tokenize the input clinical note
62
+ inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
63
+
64
+ # Move inputs to the GPU if available
65
+ inputs = {key: val.to(device) for key, val in inputs.items()}
66
+
67
+ # Model inference
68
+ with torch.no_grad():
69
+ outputs = model(**inputs)
70
+ logits = outputs.logits
71
+
72
+ # Apply sigmoid and threshold the output (0.5 for multi-label classification)
73
+ pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
74
+
75
+ # Get the predicted ICD and CPT codes
76
+ predicted_codes = mlb.inverse_transform(pred_labels)
77
+
78
+ # Format the results for better display
79
+ if predicted_codes:
80
+ st.write("**Predicted ICD and CPT Codes:**")
81
+ for codes in predicted_codes:
82
+ for code in codes:
83
+ if code in ['81001.0', '99213.0', '99214.0']: # Adjust based on your CPT code list
84
+ st.write(f"- **CPT Code:** {code}")
85
+ else:
86
+ st.write(f"- **ICD Code:** {code}")
87
+ else:
88
+ st.write("No codes predicted.")
89
+
90
+ # else:
91
+ # st.write("Please enter clinical notes for prediction.")
92
+
93
+
94
+ # # Prediction button
95
+ # if st.button('Predict'):
96
+ # if clinical_note:
97
+ # # Tokenize the input clinical note
98
+ # inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
99
+
100
+ # # Move inputs to the GPU if available
101
+ # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
102
+ # inputs = {key: val.to(device) for key, val in inputs.items()}
103
+
104
+ # # Model inference
105
+ # with torch.no_grad():
106
+ # outputs = model(**inputs)
107
+ # logits = outputs.logits
108
+
109
+ # # Apply sigmoid and threshold the output (0.5 for multi-label classification)
110
+ # pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
111
+
112
+ # # Get the predicted ICD and CPT codes
113
+ # predicted_codes = mlb.inverse_transform(pred_labels)
114
+
115
+ # # Show the results
116
+ # st.write("Predicted ICD and CPT Codes:")
117
+ # st.write(predicted_codes)
118
+
119
+ # else:
120
+ # st.write("Please enter clinical notes for prediction.")
best_model_v2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c25f6e2d51f7fd990e2ef766e3f05c45bcf546f1aecdde50d26d0ca71ffff98
3
+ size 438036398
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ transformers
4
+ pandas
5
+ numpy
6
+ scikit-learn