Upload 3 files
Browse files- app.py +120 -0
- best_model_v2.pth +3 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
from sklearn.preprocessing import MultiLabelBinarizer
|
7 |
+
|
8 |
+
# Check if a GPU is available
|
9 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
10 |
+
|
11 |
+
# Load the trained model and tokenizer
|
12 |
+
@st.cache_resource
|
13 |
+
def load_model():
|
14 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
15 |
+
"microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract",
|
16 |
+
num_labels=8, # Adjust based on your label count
|
17 |
+
problem_type="multi_label_classification"
|
18 |
+
)
|
19 |
+
model.load_state_dict(torch.load('best_model_v2.pth'))
|
20 |
+
model.eval()
|
21 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract")
|
22 |
+
model = model.to(device) # Move the model to the correct device
|
23 |
+
|
24 |
+
return model, tokenizer
|
25 |
+
|
26 |
+
@st.cache_resource
|
27 |
+
def load_mlb():
|
28 |
+
# Define the classes based on your label set
|
29 |
+
# classes = ['E11.9', 'I10', 'J45.909', 'M54.5', 'N39.0', '81001.0', '99213.0', '99214.0']
|
30 |
+
classes = ['81001.0','99213.0','99214.0','E11.9','I10','J45.909','M54.5','N39.0']
|
31 |
+
# Initialize and fit the MultiLabelBinarizer
|
32 |
+
mlb = MultiLabelBinarizer(classes=classes)
|
33 |
+
mlb.fit([classes]) # Fit with the full list of labels as a single sample
|
34 |
+
|
35 |
+
return mlb
|
36 |
+
|
37 |
+
|
38 |
+
# # Load MultiLabelBinarizer
|
39 |
+
# @st.cache_resource
|
40 |
+
# def load_mlb():
|
41 |
+
# mlb = MultiLabelBinarizer()
|
42 |
+
# # mlb.classes_ = np.load('mlb_classes.npy') # Assuming you saved the classes array during training
|
43 |
+
# mlb = MultiLabelBinarizer(classes=['E11.9', 'I10', 'J45.909', 'M54.5',
|
44 |
+
# 'N39.0', '81001.0', '99213.0', '99214.0']) # Update with actual labels
|
45 |
+
|
46 |
+
# return mlb
|
47 |
+
|
48 |
+
model, tokenizer = load_model()
|
49 |
+
mlb = load_mlb()
|
50 |
+
|
51 |
+
# Streamlit UI
|
52 |
+
st.title("Automated Coding and Billing Prediction")
|
53 |
+
# st.write("Enter clinical notes to predict ICD and CPT codes.")
|
54 |
+
|
55 |
+
# Text input for Clinical Notes
|
56 |
+
clinical_note = st.text_area("Enter clinical notes to predict ICD and CPT codes")
|
57 |
+
|
58 |
+
# Prediction button
|
59 |
+
if st.button('Predict'):
|
60 |
+
if clinical_note:
|
61 |
+
# Tokenize the input clinical note
|
62 |
+
inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
|
63 |
+
|
64 |
+
# Move inputs to the GPU if available
|
65 |
+
inputs = {key: val.to(device) for key, val in inputs.items()}
|
66 |
+
|
67 |
+
# Model inference
|
68 |
+
with torch.no_grad():
|
69 |
+
outputs = model(**inputs)
|
70 |
+
logits = outputs.logits
|
71 |
+
|
72 |
+
# Apply sigmoid and threshold the output (0.5 for multi-label classification)
|
73 |
+
pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
|
74 |
+
|
75 |
+
# Get the predicted ICD and CPT codes
|
76 |
+
predicted_codes = mlb.inverse_transform(pred_labels)
|
77 |
+
|
78 |
+
# Format the results for better display
|
79 |
+
if predicted_codes:
|
80 |
+
st.write("**Predicted ICD and CPT Codes:**")
|
81 |
+
for codes in predicted_codes:
|
82 |
+
for code in codes:
|
83 |
+
if code in ['81001.0', '99213.0', '99214.0']: # Adjust based on your CPT code list
|
84 |
+
st.write(f"- **CPT Code:** {code}")
|
85 |
+
else:
|
86 |
+
st.write(f"- **ICD Code:** {code}")
|
87 |
+
else:
|
88 |
+
st.write("No codes predicted.")
|
89 |
+
|
90 |
+
# else:
|
91 |
+
# st.write("Please enter clinical notes for prediction.")
|
92 |
+
|
93 |
+
|
94 |
+
# # Prediction button
|
95 |
+
# if st.button('Predict'):
|
96 |
+
# if clinical_note:
|
97 |
+
# # Tokenize the input clinical note
|
98 |
+
# inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
|
99 |
+
|
100 |
+
# # Move inputs to the GPU if available
|
101 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
102 |
+
# inputs = {key: val.to(device) for key, val in inputs.items()}
|
103 |
+
|
104 |
+
# # Model inference
|
105 |
+
# with torch.no_grad():
|
106 |
+
# outputs = model(**inputs)
|
107 |
+
# logits = outputs.logits
|
108 |
+
|
109 |
+
# # Apply sigmoid and threshold the output (0.5 for multi-label classification)
|
110 |
+
# pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
|
111 |
+
|
112 |
+
# # Get the predicted ICD and CPT codes
|
113 |
+
# predicted_codes = mlb.inverse_transform(pred_labels)
|
114 |
+
|
115 |
+
# # Show the results
|
116 |
+
# st.write("Predicted ICD and CPT Codes:")
|
117 |
+
# st.write(predicted_codes)
|
118 |
+
|
119 |
+
# else:
|
120 |
+
# st.write("Please enter clinical notes for prediction.")
|
best_model_v2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c25f6e2d51f7fd990e2ef766e3f05c45bcf546f1aecdde50d26d0ca71ffff98
|
3 |
+
size 438036398
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
torch
|
3 |
+
transformers
|
4 |
+
pandas
|
5 |
+
numpy
|
6 |
+
scikit-learn
|