Spaces:
Build error
Build error
File size: 1,066 Bytes
d9ce745 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import streamlit as st
from keybert import KeyBERT
from transformers import AutoTokenizer
import re
def create_nest_sentences(document:str, token_max_length = 1024):
nested = []
sent = []
length = 0
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
length += len(tokens_in_sentence)
if length < token_max_length:
sent.append(sentence)
else:
nested.append(sent)
sent = [sentence]
length = 0
if sent:
nested.append(sent)
return nested
@st.cache_data
def load_keyword_model():
kw_model = KeyBERT()
return kw_model
def keyword_gen(kw_model, sequence:str):
keywords = kw_model.extract_keywords(
sequence,
keyphrase_ngram_range=(1, 2),
stop_words='english',
use_mmr=True,
diversity=0.5,
top_n=10
)
return keywords |