import logging

import numpy as np
import pandas as pd
import spacy
from transformers import AutoModelForCausalLM, AutoTokenizer
from sentence_transformers import CrossEncoder

import src.backend.util as util

# Set up basic configuration for logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Load spacy model for word tokenization
nlp = spacy.load("en_core_web_sm")


def load_evaluation_model(model_path):
    """Load the evaluation model from the given path

    Args:
        model_path (str): Path to the evaluation model

    Returns:
        CrossEncoder: The evaluation model
    """
    model = CrossEncoder(model_path)
    return model


class ModelLoadingException(Exception):
    """Exception raised for errors in loading a model.

    Attributes:
        model_id (str): The model identifier.
        revision (str): The model revision.
    """

    def __init__(self, model_id, revision, messages="Error initializing model"):
        self.model_id = model_id
        self.revision = revision
        super().__init__(f"{messages} id={model_id} revision={revision}")

class SummaryGenerator:
    """A class to generate summaries using a causal language model.

    Attributes:
        tokenizer (AutoTokenizer): Tokenizer for the model.
        model (AutoModelForCausalLM): The causal language model.
        summaries_df (DataFrame): DataFrame to store generated summaries.
        revision (str): Model revision.
        avg_length (float): Average length of summaries.
        answer_rate (float): Rate of non-empty summaries.
    """

    def __init__(self, model_id, revision):
        """
        Initializes the SummaryGenerator with a model.

        Args:
            model_id (str): Identifier for the model.
            revision (str): Revision of the model.
        """
        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_id, revision)
            self.model = AutoModelForCausalLM.from_pretrained(model_id, revision)
        except Exception as e:
            logging.error(f"Error initializing model with id {model_id} and revision {revision}: {e}")
            raise ModelLoadingException(model_id, revision) from e
        self.summaries_df = pd.DataFrame()
        self.revision = revision
        self.avg_length = None
        self.answer_rate = None
        self.error_rate = None

    def generate_summaries(self, df):
        """Generate summaries for a given DataFrame of source docs.

        Args:
            df (DataFrame): DataFrame containing source docs.

        Returns:
            summaries_df (DataFrame): Generated summaries by the model.
        """
        source, summary, dataset = [], [], []

        error_count = 0
        for index, row in df.iterrows():
            _source = row['text']
            _dataset = row['dataset']

            prompt = util.generate_prompt(_source)
            inputs = self.tokenizer(prompt, return_tensors='pt', max_length=1024,
                                    revision=self.revision)
            try:
                outputs = self.model.generate(**inputs, max_new_tokens=1024, do_sample=False,
                                            temperature=0.0, revision=self.revision)
                response = self.tokenizer.decode(outputs[0], skip_special_tokens=True,
                                                revision=self.revision)
            except Exception as e:
                print(f"Error at index {index}: {e}")
                response = ""
                error_count += 1

            summary.append(response)
            source.append(_source)
            dataset.append(_dataset)

        self.summaries_df = pd.DataFrame(list(zip(source, summary, dataset)),
                                        columns=["source", "summary", "dataset"])
        self._compute_avg_length()
        self._compute_answer_rate()
        self._compute_error_rate(error_count)

        return self.summaries_df

    def _compute_avg_length(self):
        """
        Compute the average length of non-empty summaries using SpaCy.
        """
        total_words = 0
        count = 0

        for summary in self.summaries_df['summary']:
            if summary != "":
                doc = nlp(summary)
                words = [token.text for token in doc if token.is_alpha]
                total_words += len(words)
                count += 1

        self.avg_length = 0 if count == 0 else total_words / count

    def _compute_answer_rate(self):
        """
        Compute the rate of non-empty summaries.
        """
        non_empty_count = sum(1 for summary in self.summaries_df['summary'] if summary)
        total_rows = len(self.summaries_df)

        self.answer_rate = 0 if total_rows == 0 else non_empty_count / total_rows

    def _compute_error_rate(self, count):
        """
        Compute the error rate of summaries.
        """
        total_rows = len(self.summaries_df)

        self.error_rate = 0 if total_rows == 0 else count / total_rows


class EvaluationModel:
    """A class to evaluate generated summaries.

    Attributes:
        model (CrossEncoder): The evaluation model.
        scores (list): List of evaluation scores.
        accuracy (float): Accuracy of the summaries.
        hallucination_rate (float): Rate of hallucination in summaries.
    """

    def __init__(self, model_path):
        """
        Initializes the EvaluationModel with a CrossEncoder model.

        Args:
            model_path (str): Path to the CrossEncoder model.
        """
        self.model = load_evaluation_model(model_path)
        self.scores = []
        self.accuracy = None
        self.hallucination_rate = None

    def evaluate_hallucination(self, summaries_df):
        """
        Evaluate the hallucination rate in summaries. This method updates the 'scores' attribute 
        of the instance with the computed scores.

        Args:
            summaries_df (DataFrame): DataFrame containing source docs and summaries.

        Returns:
            list: List of hallucination scores. Also updates the 'scores' attribute of the instance.
        """
        source_docs = np.array(summaries_df['source'])
        generated_summaries = np.array(summaries_df['summary'])
        try:
            scores = self.model.predict(source_docs, generated_summaries)
            self.scores = scores
            return self.scores
        except Exception as e:
            logging.error(f"Error evaluating hallucination: {e}")
            raise

    def compute_accuracy(self, threshold=0.5):
        """
        Compute the accuracy of the evaluated summaries based on the previously calculated scores. 
        This method relies on the 'scores' attribute being populated, typically via the 
        'evaluate_hallucination' method.

        Returns:
            float: Accuracy percentage. Also updates the 'accuracy' and 'hallucination_rate' 
                    attributes of the instance.

        Raises:
            ValueError: If scores have not been calculated prior to calling this method.
        """
        if not self.scores:
            error_msg = "Scores not calculated. Call evaluate_hallucination() first."
            logging.error(error_msg)
            raise ValueError(error_msg)

        # Use threshold of 0.5 to compute accuracy
        num_above_threshold = sum(score >= threshold for score in self.scores)
        num_total = len(self.scores)

        if not num_total:
            raise ValueError("No scores available to compute accuracy.")

        self.accuracy = (num_above_threshold / num_total) * 100
        self.hallucination_rate = 100 - self.accuracy

        return self.accuracy