Upload 3 files
Browse files- app.py +120 -0
- best_model_v2.pth +3 -0
- requirements.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import torch
|
| 3 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import numpy as np
|
| 6 |
+
from sklearn.preprocessing import MultiLabelBinarizer
|
| 7 |
+
|
| 8 |
+
# Check if a GPU is available
|
| 9 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 10 |
+
|
| 11 |
+
# Load the trained model and tokenizer
|
| 12 |
+
@st.cache_resource
|
| 13 |
+
def load_model():
|
| 14 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 15 |
+
"microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract",
|
| 16 |
+
num_labels=8, # Adjust based on your label count
|
| 17 |
+
problem_type="multi_label_classification"
|
| 18 |
+
)
|
| 19 |
+
model.load_state_dict(torch.load('best_model_v2.pth'))
|
| 20 |
+
model.eval()
|
| 21 |
+
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract")
|
| 22 |
+
model = model.to(device) # Move the model to the correct device
|
| 23 |
+
|
| 24 |
+
return model, tokenizer
|
| 25 |
+
|
| 26 |
+
@st.cache_resource
|
| 27 |
+
def load_mlb():
|
| 28 |
+
# Define the classes based on your label set
|
| 29 |
+
# classes = ['E11.9', 'I10', 'J45.909', 'M54.5', 'N39.0', '81001.0', '99213.0', '99214.0']
|
| 30 |
+
classes = ['81001.0','99213.0','99214.0','E11.9','I10','J45.909','M54.5','N39.0']
|
| 31 |
+
# Initialize and fit the MultiLabelBinarizer
|
| 32 |
+
mlb = MultiLabelBinarizer(classes=classes)
|
| 33 |
+
mlb.fit([classes]) # Fit with the full list of labels as a single sample
|
| 34 |
+
|
| 35 |
+
return mlb
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
# # Load MultiLabelBinarizer
|
| 39 |
+
# @st.cache_resource
|
| 40 |
+
# def load_mlb():
|
| 41 |
+
# mlb = MultiLabelBinarizer()
|
| 42 |
+
# # mlb.classes_ = np.load('mlb_classes.npy') # Assuming you saved the classes array during training
|
| 43 |
+
# mlb = MultiLabelBinarizer(classes=['E11.9', 'I10', 'J45.909', 'M54.5',
|
| 44 |
+
# 'N39.0', '81001.0', '99213.0', '99214.0']) # Update with actual labels
|
| 45 |
+
|
| 46 |
+
# return mlb
|
| 47 |
+
|
| 48 |
+
model, tokenizer = load_model()
|
| 49 |
+
mlb = load_mlb()
|
| 50 |
+
|
| 51 |
+
# Streamlit UI
|
| 52 |
+
st.title("Automated Coding and Billing Prediction")
|
| 53 |
+
# st.write("Enter clinical notes to predict ICD and CPT codes.")
|
| 54 |
+
|
| 55 |
+
# Text input for Clinical Notes
|
| 56 |
+
clinical_note = st.text_area("Enter clinical notes to predict ICD and CPT codes")
|
| 57 |
+
|
| 58 |
+
# Prediction button
|
| 59 |
+
if st.button('Predict'):
|
| 60 |
+
if clinical_note:
|
| 61 |
+
# Tokenize the input clinical note
|
| 62 |
+
inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
|
| 63 |
+
|
| 64 |
+
# Move inputs to the GPU if available
|
| 65 |
+
inputs = {key: val.to(device) for key, val in inputs.items()}
|
| 66 |
+
|
| 67 |
+
# Model inference
|
| 68 |
+
with torch.no_grad():
|
| 69 |
+
outputs = model(**inputs)
|
| 70 |
+
logits = outputs.logits
|
| 71 |
+
|
| 72 |
+
# Apply sigmoid and threshold the output (0.5 for multi-label classification)
|
| 73 |
+
pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
|
| 74 |
+
|
| 75 |
+
# Get the predicted ICD and CPT codes
|
| 76 |
+
predicted_codes = mlb.inverse_transform(pred_labels)
|
| 77 |
+
|
| 78 |
+
# Format the results for better display
|
| 79 |
+
if predicted_codes:
|
| 80 |
+
st.write("**Predicted ICD and CPT Codes:**")
|
| 81 |
+
for codes in predicted_codes:
|
| 82 |
+
for code in codes:
|
| 83 |
+
if code in ['81001.0', '99213.0', '99214.0']: # Adjust based on your CPT code list
|
| 84 |
+
st.write(f"- **CPT Code:** {code}")
|
| 85 |
+
else:
|
| 86 |
+
st.write(f"- **ICD Code:** {code}")
|
| 87 |
+
else:
|
| 88 |
+
st.write("No codes predicted.")
|
| 89 |
+
|
| 90 |
+
# else:
|
| 91 |
+
# st.write("Please enter clinical notes for prediction.")
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# # Prediction button
|
| 95 |
+
# if st.button('Predict'):
|
| 96 |
+
# if clinical_note:
|
| 97 |
+
# # Tokenize the input clinical note
|
| 98 |
+
# inputs = tokenizer(clinical_note, truncation=True, padding="max_length", max_length=512, return_tensors='pt')
|
| 99 |
+
|
| 100 |
+
# # Move inputs to the GPU if available
|
| 101 |
+
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 102 |
+
# inputs = {key: val.to(device) for key, val in inputs.items()}
|
| 103 |
+
|
| 104 |
+
# # Model inference
|
| 105 |
+
# with torch.no_grad():
|
| 106 |
+
# outputs = model(**inputs)
|
| 107 |
+
# logits = outputs.logits
|
| 108 |
+
|
| 109 |
+
# # Apply sigmoid and threshold the output (0.5 for multi-label classification)
|
| 110 |
+
# pred_labels = (torch.sigmoid(logits) > 0.5).cpu().numpy()
|
| 111 |
+
|
| 112 |
+
# # Get the predicted ICD and CPT codes
|
| 113 |
+
# predicted_codes = mlb.inverse_transform(pred_labels)
|
| 114 |
+
|
| 115 |
+
# # Show the results
|
| 116 |
+
# st.write("Predicted ICD and CPT Codes:")
|
| 117 |
+
# st.write(predicted_codes)
|
| 118 |
+
|
| 119 |
+
# else:
|
| 120 |
+
# st.write("Please enter clinical notes for prediction.")
|
best_model_v2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c25f6e2d51f7fd990e2ef766e3f05c45bcf546f1aecdde50d26d0ca71ffff98
|
| 3 |
+
size 438036398
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
torch
|
| 3 |
+
transformers
|
| 4 |
+
pandas
|
| 5 |
+
numpy
|
| 6 |
+
scikit-learn
|