speaker-diarization-app-v2

Running

File size: 1,374 Bytes

9b2efc6
b1426fb
 
 
 
08d05f4
 
1ae5349
b1426fb
08d05f4
 
1ae5349
9b2efc6
 
08d05f4
 
8998fb8
08d05f4
b1426fb
9b2efc6
08d05f4
1ae5349
9b2efc6
 
 
 
 
 
 
 
 
 
08d05f4
 
9b2efc6

from transformers import BartTokenizer, BartForConditionalGeneration
import torch
import streamlit as st

class Summarizer:
    def __init__(self):
        self.model = None
        self.tokenizer = None

    def load_model(self):
        try:
            self.tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
            self.model = torch.load('bart_ami_finetuned.pkl')
            self.model.to(torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            return self.model
        except Exception as e:
            st.error(f"Error loading summarization model: {str(e)}")
            return None

    def process(self, text: str, max_length: int = 150, min_length: int = 40):
        try:
            inputs = self.tokenizer(text, return_tensors="pt", max_length=1024, truncation=True)
            inputs = {key: value.to(self.model.device) for key, value in inputs.items()}
            summary_ids = self.model.generate(
                inputs["input_ids"],
                max_length=max_length,
                min_length=min_length,
                num_beams=4,
                length_penalty=2.0
            )
            summary = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            return summary
        except Exception as e:
            st.error(f"Error in summarization: {str(e)}")
            return None