Spaces:

mavinsao
/

mental-illness-classification

Running

File size: 2,596 Bytes

a681e0a
 
 
 
ff694a9
a681e0a
 
436b48d
a681e0a
436b48d
 
 
 
 
a681e0a
 
 
 
 
 
436b48d
a681e0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436b48d
a681e0a
 
 
436b48d
a681e0a
436b48d
 
 
 
a681e0a
436b48d
 
ff694a9
436b48d
 
ff694a9
436b48d
 
a681e0a
436b48d
a681e0a
 
 
 
 
 
ff694a9
a681e0a
 
 
 
436b48d
1346a67

import torch
import torch.nn as nn
from transformers import RobertaTokenizer, RobertaModel
import json
import streamlit as st

# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("mavinsao/mi-roberta-mental-illness")
model = AutoModelForSequenceClassification.from_pretrained("mavinsao/mi-roberta-mental-illness")

# Create a common label map
common_label_map = {'ADHD': 0, 'Anxiety': 1, 'bipolar': 2, 'BPD': 3, 'depression': 4, 'OCD': 5, 'ptsd': 6, 'none': 7}
num_classes = 8


def predict_labels(sentence, tokenizer, model, device, threshold=0.5, top_n=5):
    # Tokenize the sentence and create attention mask
    tokenized_input = tokenizer(
        sentence,
        add_special_tokens=True,
        max_length=512,
        padding="max_length",
        truncation=True,
        return_tensors="pt"
    )

    # Move the input tensors to the device
    input_ids = tokenized_input['input_ids'].to(device)
    attention_mask = tokenized_input['attention_mask'].to(device)

    # Set the model to evaluation mode
    model.eval()

    # Make a prediction
    with torch.no_grad():
        output = model(input_ids, attention_mask)

    # Apply thresholding to the logits to obtain predicted labels
    logits = output.logits
    sigmoid_output = torch.sigmoid(logits.squeeze(dim=0))
    indices_above_threshold = torch.arange(logits.shape[-1], device=device)[sigmoid_output > threshold]

    # Sort the indices by their sigmoid values
    sorted_indices = indices_above_threshold[torch.argsort(sigmoid_output[indices_above_threshold], descending=True)]

    # Map the predicted label indices back to the original class labels using the common label map
    predicted_labels_with_score = [{"label": list(common_label_map.keys())[index], "score": sigmoid_output[index].item()} for index in sorted_indices[:top_n]]

    # Create a JSON object with labels, scores, and short forms
    json_result = [{"label": entry["label"], "score": entry["score"]} for entry in predicted_labels_with_score]

    return json.dumps(json_result, indent=4)


# Streamlit app
st.title('Mental Illness Prediction')

# Input text area for user input
sentence = st.text_area("Enter the long sentence to predict your mental illness state:")

# Prediction button
if st.button('Predict'):
    # Predict label
    predicted_response = predict_labels(sentence, tokenizer, model, device)
    st.json(predicted_response)