Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Minseok Bae
Integrated backend pipelines - error occurs during model submission. (Debugging needed).
58b9de9
| import logging | |
| import numpy as np | |
| import pandas as pd | |
| import spacy | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| from sentence_transformers import CrossEncoder | |
| import src.backend.util as util | |
| # Set up basic configuration for logging | |
| logging.basicConfig(level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Load spacy model for word tokenization | |
| nlp = spacy.load("en_core_web_sm") | |
| def load_evaluation_model(model_path): | |
| """Load the evaluation model from the given path | |
| Args: | |
| model_path (str): Path to the evaluation model | |
| Returns: | |
| CrossEncoder: The evaluation model | |
| """ | |
| model = CrossEncoder(model_path) | |
| return model | |
| class ModelLoadingException(Exception): | |
| """Exception raised for errors in loading a model. | |
| Attributes: | |
| model_id (str): The model identifier. | |
| revision (str): The model revision. | |
| """ | |
| def __init__(self, model_id, revision, messages="Error initializing model"): | |
| self.model_id = model_id | |
| self.revision = revision | |
| super().__init__(f"{messages} id={model_id} revision={revision}") | |
| class SummaryGenerator: | |
| """A class to generate summaries using a causal language model. | |
| Attributes: | |
| tokenizer (AutoTokenizer): Tokenizer for the model. | |
| model (AutoModelForCausalLM): The causal language model. | |
| summaries_df (DataFrame): DataFrame to store generated summaries. | |
| revision (str): Model revision. | |
| avg_length (float): Average length of summaries. | |
| answer_rate (float): Rate of non-empty summaries. | |
| """ | |
| def __init__(self, model_id, revision): | |
| """ | |
| Initializes the SummaryGenerator with a model. | |
| Args: | |
| model_id (str): Identifier for the model. | |
| revision (str): Revision of the model. | |
| """ | |
| try: | |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id, revision) | |
| self.model = AutoModelForCausalLM.from_pretrained(model_id, revision) | |
| except Exception as e: | |
| logging.error(f"Error initializing model with id {model_id} and revision {revision}: {e}") | |
| raise ModelLoadingException(model_id, revision) from e | |
| self.summaries_df = pd.DataFrame() | |
| self.revision = revision | |
| self.avg_length = None | |
| self.answer_rate = None | |
| self.error_rate = None | |
| def generate_summaries(self, df): | |
| """Generate summaries for a given DataFrame of source docs. | |
| Args: | |
| df (DataFrame): DataFrame containing source docs. | |
| Returns: | |
| summaries_df (DataFrame): Generated summaries by the model. | |
| """ | |
| source, summary, dataset = [], [], [] | |
| error_count = 0 | |
| for index, row in df.iterrows(): | |
| _source = row['text'] | |
| _dataset = row['dataset'] | |
| prompt = util.generate_prompt(_source) | |
| inputs = self.tokenizer(prompt, return_tensors='pt', max_length=1024, | |
| revision=self.revision) | |
| try: | |
| outputs = self.model.generate(**inputs, max_new_tokens=1024, do_sample=False, | |
| temperature=0.0, revision=self.revision) | |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True, | |
| revision=self.revision) | |
| except Exception as e: | |
| print(f"Error at index {index}: {e}") | |
| response = "" | |
| error_count += 1 | |
| summary.append(response) | |
| source.append(_source) | |
| dataset.append(_dataset) | |
| self.summaries_df = pd.DataFrame(list(zip(source, summary, dataset)), | |
| columns=["source", "summary", "dataset"]) | |
| self._compute_avg_length() | |
| self._compute_answer_rate() | |
| self._compute_error_rate(error_count) | |
| return self.summaries_df | |
| def _compute_avg_length(self): | |
| """ | |
| Compute the average length of non-empty summaries using SpaCy. | |
| """ | |
| total_words = 0 | |
| count = 0 | |
| for summary in self.summaries_df['summary']: | |
| if summary != "": | |
| doc = nlp(summary) | |
| words = [token.text for token in doc if token.is_alpha] | |
| total_words += len(words) | |
| count += 1 | |
| self.avg_length = 0 if count == 0 else total_words / count | |
| def _compute_answer_rate(self): | |
| """ | |
| Compute the rate of non-empty summaries. | |
| """ | |
| non_empty_count = sum(1 for summary in self.summaries_df['summary'] if summary) | |
| total_rows = len(self.summaries_df) | |
| self.answer_rate = 0 if total_rows == 0 else non_empty_count / total_rows | |
| def _compute_error_rate(self, count): | |
| """ | |
| Compute the error rate of summaries. | |
| """ | |
| total_rows = len(self.summaries_df) | |
| self.error_rate = 0 if total_rows == 0 else count / total_rows | |
| class EvaluationModel: | |
| """A class to evaluate generated summaries. | |
| Attributes: | |
| model (CrossEncoder): The evaluation model. | |
| scores (list): List of evaluation scores. | |
| accuracy (float): Accuracy of the summaries. | |
| hallucination_rate (float): Rate of hallucination in summaries. | |
| """ | |
| def __init__(self, model_path): | |
| """ | |
| Initializes the EvaluationModel with a CrossEncoder model. | |
| Args: | |
| model_path (str): Path to the CrossEncoder model. | |
| """ | |
| self.model = load_evaluation_model(model_path) | |
| self.scores = [] | |
| self.accuracy = None | |
| self.hallucination_rate = None | |
| def evaluate_hallucination(self, summaries_df): | |
| """ | |
| Evaluate the hallucination rate in summaries. This method updates the 'scores' attribute | |
| of the instance with the computed scores. | |
| Args: | |
| summaries_df (DataFrame): DataFrame containing source docs and summaries. | |
| Returns: | |
| list: List of hallucination scores. Also updates the 'scores' attribute of the instance. | |
| """ | |
| source_docs = np.array(summaries_df['source']) | |
| generated_summaries = np.array(summaries_df['summary']) | |
| try: | |
| scores = self.model.predict(source_docs, generated_summaries) | |
| self.scores = scores | |
| return self.scores | |
| except Exception as e: | |
| logging.error(f"Error evaluating hallucination: {e}") | |
| raise | |
| def compute_accuracy(self, threshold=0.5): | |
| """ | |
| Compute the accuracy of the evaluated summaries based on the previously calculated scores. | |
| This method relies on the 'scores' attribute being populated, typically via the | |
| 'evaluate_hallucination' method. | |
| Returns: | |
| float: Accuracy percentage. Also updates the 'accuracy' and 'hallucination_rate' | |
| attributes of the instance. | |
| Raises: | |
| ValueError: If scores have not been calculated prior to calling this method. | |
| """ | |
| if not self.scores: | |
| error_msg = "Scores not calculated. Call evaluate_hallucination() first." | |
| logging.error(error_msg) | |
| raise ValueError(error_msg) | |
| # Use threshold of 0.5 to compute accuracy | |
| num_above_threshold = sum(score >= threshold for score in self.scores) | |
| num_total = len(self.scores) | |
| if not num_total: | |
| raise ValueError("No scores available to compute accuracy.") | |
| self.accuracy = (num_above_threshold / num_total) * 100 | |
| self.hallucination_rate = 100 - self.accuracy | |
| return self.accuracy | |