File size: 849 Bytes
809119f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch

tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")

def summarize_text(text, max_chunk_length=512):
    text = text.replace("\n", " ")
    chunks = [text[i:i+max_chunk_length] for i in range(0, len(text), max_chunk_length)]

    summarized_chunks = []

    for chunk in chunks:
        input_text = "summarize: " + chunk
        inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
        summary_ids = model.generate(inputs, max_length=150, min_length=40, num_beams=4, length_penalty=2.0, early_stopping=True)
        output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        summarized_chunks.append(output)

    return " ".join(summarized_chunks)