import streamlit as st
import torch
import torch.nn as nn
import transformers
from transformers import AutoTokenizer,AutoModel
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
import torch.nn.functional as F

class BCNN(nn.Module):
    def __init__(self, embedding_dim, output_dim,
                 dropout,bidirectional_units,conv_filters):

        super().__init__()
        self.bert = AutoModel.from_pretrained('vinai/phobert-base-v2')
        #.fc_input = nn.Linear(embedding_dim,embedding_dim)
        self.bidirectional_lstm = nn.LSTM(
            embedding_dim, bidirectional_units, bidirectional=True, batch_first=True
        )
        self.conv1 = nn.Conv1d(in_channels=2*bidirectional_units, out_channels=conv_filters[0], kernel_size=4)
        self.conv2 = nn.Conv1d(in_channels=2*bidirectional_units, out_channels=conv_filters[1], kernel_size=5)

        self.fc = nn.Linear(64, output_dim)

        self.dropout = nn.Dropout(dropout)

    def forward(self,b_input_ids,b_input_mask):
        encoded = self.bert(b_input_ids,b_input_mask)[0]
        embedded, _ = self.bidirectional_lstm(encoded)
        embedded = embedded.permute(0, 2, 1)
        conved_1 = F.relu(self.conv1(embedded))
        conved_2 = F.relu(self.conv2(embedded))
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]

        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
        #pooled_n = [batch size, n_fibatlters]

        cat = self.dropout(torch.cat((pooled_1, pooled_2), dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]

        result =  self.fc(cat)

        return result

class TextClassificationApp:
    def __init__(self, model_path, class_names, model_name='vinai/phobert-base-v2'):
        """
        Initialize Streamlit Text Classification App
        
        Args:
            model_path (str): Path to the pre-trained .pt model file
            class_names (list): List of classification labels
            model_name (str): Hugging Face model name for tokenization
        """
        # Set up Streamlit page
        st.set_page_config(
            page_title="Text Classification", 
            page_icon="📝", 
            layout="wide"
        )
        
        # Device configuration
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Load tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        
        # Load the model
        EMBEDDING_DIM = 768
        OUTPUT_DIM = 2
        DROPOUT = 0.1
        CONV_FILTERS = [32, 32]  # Number of filters for each kernel size (4 and 5)
        BIDIRECTIONAL_UNITS = 128
        self.model = BCNN(EMBEDDING_DIM, OUTPUT_DIM, DROPOUT, BIDIRECTIONAL_UNITS, CONV_FILTERS)
        self.model = torch.load(r'toxic.pt',map_location=torch.device('cpu'))
        self.model.eval()  # Set to evaluation mode
        
        # Store class names
        self.class_names = class_names
        
        # Maximum sequence length
        self.max_length = 128

    def preprocess_text(self, text):
        """
        Preprocess input text for model prediction
        
        Args:
            text (str): Input text to classify
        
        Returns:
            torch.Tensor: Tokenized and encoded input
        """
        # Tokenize and encode the text
        input_ids = []
        attention_masks = []
        encoded = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )
        input_ids.append(encoded['input_ids'].to(self.device))
        attention_masks.append(encoded['attention_mask'].to(self.device))
        input_ids = torch.cat(input_ids, dim=0).to(self.device)
        attention_masks = torch.cat(attention_masks, dim=0).to(self.device)
        return input_ids, attention_masks

    def predict(self, text):
        """
        Make prediction on the input text
        
        Args:
            text (str): Input text to classify
        
        Returns:
            tuple: (predicted class, probabilities)
        """
        # Preprocess the text
        inputs,mask = self.preprocess_text(text)
        
        # Disable gradient calculation
        with torch.no_grad():
            # Get model outputs
            outputs = self.model(inputs,mask)
            
            # Apply softmax to get probabilities
            probabilities = torch.softmax(outputs, dim=1)
            
            # Get top predictions
            top_probs, top_classes = torch.topk(probabilities, k=1)
            
            return top_classes[0].cpu().numpy(), top_probs[0].cpu().numpy()

    def run(self):
        """
        Main Streamlit app runner
        """
        # Title and description
        st.title("📄 Text Classification")
        st.write("Enter text to classify")
        
        # Text input
        text_input = st.text_area(
            "Paste your text here", 
            height=250,
            placeholder="Enter the text you want to classify..."
        )
        
        # Prediction button
        if st.button("Classify Text"):
            if text_input.strip():
                # Make prediction
                top_classes, top_probs = self.predict(text_input)
                
                # Display results
                st.subheader("Classification Results")
                
                # Create columns for results
                cols = st.columns(3)
                
                for i, (cls, prob) in enumerate(zip(top_classes, top_probs)):
                    with cols[i]:
                        st.metric(
                            label=f"Top {i+1} Prediction", 
                            value=f"{self.class_names[cls]}", 
                            delta=f"{prob:.2%}"
                        )
                # Show input text details
                with st.expander("Input Text Details"):
                    st.write("**Original Text:**")
                    st.write(text_input)
                    st.write(f"**Text Length:** {len(text_input)} characters")
            else:
                st.warning("Please enter some text to classify")

def main():
    # Replace these with your actual model path and class names
    MODEL_PATH = '/workspaces/final-project-dl/toxic.pt'
    CLASS_NAMES = [ 
        'Non-toxic', 
        'Toxic'
    ]
    
    # Initialize and run the app
    app = TextClassificationApp(MODEL_PATH, CLASS_NAMES)
    app.run()

if __name__ == "__main__":
    main()