Spaces:

Tachygraphy-Microtext-Normalization-IEMK25
/

Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

Sleeping

Tachygraphy-Microtext-Analysis-and-Normalization-ArchismanCoder

File size: 10,712 Bytes

4a97943

from imports import *
import importlib.util
import os
import sys
import joblib

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), )))

# from hmv_cfg_base_stage1.model1 import load_model as load_model1
# from hmv_cfg_base_stage1.model1 import predict as predict1

BASE_DIR = os.path.dirname(os.path.abspath(__file__))
CONFIG_STAGE1 = os.path.join(BASE_DIR, "config", "stage1_models.json")
LOADERS_STAGE1 = os.path.join(BASE_DIR, "hmv-cfg-base-stage1")

# Load the model and tokenizer
# model_name = "tachygraphy-microtrext-norm-org/DeBERTa-v3-seqClassfication-LV1-SentimentPolarities-Batch8"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModel.from_pretrained(model_name)

SENTIMENT_POLARITY_LABELS = [
    "negative", "neutral", "positive"
]

current_model = None
current_tokenizer = None

# Enabling Resource caching
@st.cache_resource

def load_model_config():
    with open(CONFIG_STAGE1, "r") as f:
        model_data = json.load(f)

    model_options = {v["name"]: v for v in model_data.values()}  # Extract names for dropdown
    return model_data, model_options

MODEL_DATA, MODEL_OPTIONS = load_model_config()



# def load_model():
#     model = DebertaV2ForSequenceClassification.from_pretrained(model_name)
#     tokenizer = DebertaV2Tokenizer.from_pretrained(model_name)
#     return model, tokenizer


# ✅ Dynamically Import Model Functions
def import_from_module(module_name, function_name):
    try:
        module = importlib.import_module(module_name)
        return getattr(module, function_name)
    except (ModuleNotFoundError, AttributeError) as e:
        st.error(f"❌ Import Error: {e}")
        return None


def free_memory():
    #  """Free up CPU & GPU memory before loading a new model."""
    global current_model, current_tokenizer

    if current_model is not None:
        del current_model  # Delete the existing model
        current_model = None  # Reset reference

    if current_tokenizer is not None:
        del current_tokenizer  # Delete the tokenizer
        current_tokenizer = None

    gc.collect()  # Force garbage collection for CPU memory

    if torch.cuda.is_available():  
        torch.cuda.empty_cache()  # Free GPU memory
        torch.cuda.ipc_collect()  # Clean up PyTorch GPU cache

    # If running on CPU, reclaim memory using OS-level commands
    try:
        if torch.cuda.is_available() is False:
            psutil.virtual_memory()  # Refresh memory stats
    except Exception as e:
        print(f"Memory cleanup error: {e}")


def load_selected_model(model_name):
    global current_model, current_tokenizer

    free_memory()

    # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys())  # ✅ See available models
    # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[model_name])  # ✅ Check selected model
    # st.write("DEBUG: Model Name:", model_name)  # ✅ Check selected model

    if model_name not in MODEL_OPTIONS:
        st.error(f"⚠️ Model '{model_name}' not found in config!")
        return None, None, None

    model_info = MODEL_OPTIONS[model_name]
    hf_location = model_info["hf_location"]

    model_module = model_info["module_path"]
    load_function = model_info["load_function"]
    predict_function = model_info["predict_function"]

    load_model_func = import_from_module(model_module, load_function)
    predict_func = import_from_module(model_module, predict_function)

    if load_model_func is None or predict_func is None:
        st.error("❌ Model functions could not be loaded!")
        return None, None, None

    model, tokenizer = load_model_func()
    
    current_model, current_tokenizer = model, tokenizer
    return model, tokenizer, predict_func

# def load_selected_model(model_name):
#     # """Load model and tokenizer based on user selection."""
#     global current_model, current_tokenizer

#     # Free memory before loading a new model
#     free_memory()

#     if model_name not in MODEL_OPTIONS:
#         st.error(f"⚠️ Model '{model_name}' not found in config!")
#         return None, None

#     model_info = MODEL_OPTIONS[model_name]
#     hf_location = model_info["hf_location"]

#     model_module = model_info["module_path"]
#     # load_function = "load_model"
#     # predict_function = "predict"

#     load_function = model_info["load_function"]
#     predict_function = model_info["predict_function"]

#     # tokenizer_class = globals()[model_info["tokenizer_class"]]
#     # model_class = globals()[model_info["model_class"]]

#     # tokenizer = tokenizer_class.from_pretrained(hf_location)


#     load_model_func = import_from_module(model_module, load_function)
#     predict_func = import_from_module(model_module, predict_function)

#     # # Load model
#     # if model_info["type"] == "custom_checkpoint" or model_info["type"] == "custom_model":
#     #     model = torch.load(hf_location, map_location="cpu")  # Load PyTorch model
#     # elif model_info["type"] == "hf_automodel_finetuned_dbt3":
#     #     tokenizer_class = globals()[model_info["tokenizer_class"]]
#     #     model_class = globals()[model_info["model_class"]]
#     #     tokenizer = tokenizer_class.from_pretrained(hf_location)
#     #     model = model_class.from_pretrained(hf_location,
#     #                                         problem_type=model_info["problem_type"],
#     #                                         num_labels=model_info["num_labels"]
#     #     )
#     # else:
#     #     st.error("Invalid model selection")
#     #     return None, None
    

#     if load_model_func is None or predict_func is None:
#         st.error("❌ Model functions could not be loaded!")
#         return None, None

#     # current_model, current_tokenizer = model, tokenizer  # Store references
#     # return model, tokenizer

#     model, tokenizer = load_model_func(hf_location)
    
#     current_model, current_tokenizer = model, tokenizer
#     return model, tokenizer, predict_func



def predict(text, model, tokenizer, device, max_len=128):
    # Tokenize and pad the input text
    inputs = tokenizer(
        text,
        add_special_tokens=True,
        padding=True,
        truncation=False,
        return_tensors="pt",
        return_token_type_ids=False,
    ).to(device)  # Move input tensors to the correct device

    with torch.no_grad():
        outputs = model(**inputs)

    # Apply sigmoid activation (for BCEWithLogitsLoss)
    probabilities = outputs.logits.cpu().numpy()

    return probabilities 

# def show_sentiment_analysis():

    # Add your sentiment analysis code here

    # user_input = st.text_input("Enter text for sentiment analysis:")
    # user_input = st.text_area("Enter text for sentiment analysis:", height=200)
    # user_input = st.text_area("Enter text for sentiment analysis:", max_chars=500)

def show_sentiment_analysis():
    st.title("Stage 1: Sentiment Polarity Analysis")
    st.write("This section will handle sentiment analysis.")

    if "selected_model" not in st.session_state:
        st.session_state.selected_model = list(MODEL_OPTIONS.keys())[0]  # Default selection

    if "clear_output" not in st.session_state:
        st.session_state.clear_output = False

    st.selectbox("Choose a model:", list(MODEL_OPTIONS.keys()), key="selected_model")

    selected_model = st.session_state.selected_model

    if selected_model not in MODEL_OPTIONS:
        st.error(f"❌ Selected model '{selected_model}' not found!")
        st.stop()

    st.session_state.clear_output = True  # Reset output when model changes


    # st.write("DEBUG: Available Models:", MODEL_OPTIONS.keys())  # ✅ See available models
    # st.write("DEBUG: Selected Model:", MODEL_OPTIONS[selected_model])  # ✅ Check selected model


    user_input = st.text_input("Enter text for sentiment analysis:")

    if user_input:
        # Make prediction

        # model, tokenizer = load_model()
        # model, tokenizer = load_selected_model(selected_model)

        model, tokenizer, predict_func = load_selected_model(selected_model)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if model is None:
            st.error("⚠️ Error: Model failed to load! Check model selection or configuration.")
            st.stop()

        model.to(device)

        # predictions = predict(user_input, model, tokenizer, device)

        predictions = predict_func(user_input, model, tokenizer, device)

        # Squeeze predictions to remove extra dimensions
        predictions_array = predictions.squeeze()

        # Convert to binary predictions (argmax)
        binary_predictions = np.zeros_like(predictions_array)
        max_indices = np.argmax(predictions_array)
        binary_predictions[max_indices] = 1

        # Display raw predictions
        st.write(f"**Predicted Sentiment Scores:** {predictions_array}")

        # Display binary classification result
        st.write(f"**Predicted Sentiment:**")
        st.write(f"**NEGATIVE:** {binary_predictions[0]}, **NEUTRAL:** {binary_predictions[1]}, **POSITIVE:** {binary_predictions[2]}")
        # st.write(f"**NEUTRAL:** {binary_predictions[1]}")
        # st.write(f"**POSITIVE:** {binary_predictions[2]}")

        # 1️⃣ **Polar Plot (Plotly)**
        sentiment_polarities = predictions_array.tolist()
        fig_polar = px.line_polar(
            pd.DataFrame(dict(r=sentiment_polarities, theta=SENTIMENT_POLARITY_LABELS)), 
            r='r', theta='theta', line_close=True
        )
        st.plotly_chart(fig_polar)

        # 2️⃣ **Normalized Horizontal Bar Chart (Matplotlib)**
        normalized_predictions = predictions_array / predictions_array.sum()

        fig, ax = plt.subplots(figsize=(8, 2))
        left = 0
        for i in range(len(normalized_predictions)):
            ax.barh(0, normalized_predictions[i], color=plt.cm.tab10(i), left=left, label=SENTIMENT_POLARITY_LABELS[i])
            left += normalized_predictions[i]

        # Configure the chart
        ax.set_xlim(0, 1)
        ax.set_yticks([])
        ax.set_xticks(np.arange(0, 1.1, 0.1))
        ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.15), ncol=len(SENTIMENT_POLARITY_LABELS))
        plt.title("Sentiment Polarity Prediction Distribution")

        # Display in Streamlit
        st.pyplot(fig)
        


if __name__ == "__main__":
    show_sentiment_analysis()