speaker-diarization-app-v2

Running

File size: 1,533 Bytes

b1426fb
 
f6e66c5
b1426fb
 
f6e66c5
b1426fb
 
 
 
08d05f4
 
 
b1426fb
08d05f4
 
8998fb8
 
 
08d05f4
 
8998fb8
08d05f4
b1426fb
8998fb8
08d05f4
8998fb8
f6e66c5
08d05f4
 
 
 
8998fb8
 
08d05f4
 
8998fb8
 
08d05f4
 
f6e66c5

"""
Summarization Model Handler
Manages the fine-tuned BART model for text summarization.
"""

from transformers import BartTokenizer, BartForConditionalGeneration
import torch
import streamlit as st

class Summarizer:
    def __init__(self):
        self.model = None
        self.tokenizer = None

    def load_model(self):
        try:
            self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
            self.model = torch.load('bart_ami_finetuned.pkl')
            self.model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            return self.model
        except Exception as e:
            st.error(f"Error loading summarization model: {str(e)}")
            return None

    def process(self, text: str, max_length: int = 150, min_length: int = 40):
        try:
            inputs = self.tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
            inputs = {key: value.to(self.model.device) for key, value in inputs.items()}
            summary_ids = self.model.generate(
                inputs["input_ids"],
                max_length=max_length,
                min_length=min_length,
                num_beams=4,
                length_penalty=2.0
            )
            summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            # Return in the expected format
            return [{"summary_text": summary}]
        except Exception as e:
            st.error(f"Error in summarization: {str(e)}")
            return None