Keane Moraes commited on
Commit
d9ce745
·
1 Parent(s): 3c32d9a

initial commit

Browse files
Files changed (3) hide show
  1. app.py +11 -0
  2. generation.py +8 -0
  3. utils.py +43 -0
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ import time
4
+
5
+ st.title("Hello World")
6
+ progbar = st.progress(0)
7
+
8
+ for i in range(100):
9
+ progbar.progress(i + 1)
10
+ time.sleep(0.1)
11
+
generation.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+
3
+ class Insights:
4
+
5
+ def __init__(self) -> None:
6
+ pass
7
+
8
+
utils.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from keybert import KeyBERT
3
+ from transformers import AutoTokenizer
4
+ import re
5
+
6
+
7
+ def create_nest_sentences(document:str, token_max_length = 1024):
8
+ nested = []
9
+ sent = []
10
+ length = 0
11
+ tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')
12
+
13
+ for sentence in re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', document.replace("\n", ' ')):
14
+ tokens_in_sentence = tokenizer(str(sentence), truncation=False, padding=False)[0] # hugging face transformer tokenizer
15
+ length += len(tokens_in_sentence)
16
+
17
+ if length < token_max_length:
18
+ sent.append(sentence)
19
+ else:
20
+ nested.append(sent)
21
+ sent = [sentence]
22
+ length = 0
23
+
24
+ if sent:
25
+ nested.append(sent)
26
+ return nested
27
+
28
+ @st.cache_data
29
+ def load_keyword_model():
30
+ kw_model = KeyBERT()
31
+ return kw_model
32
+
33
+
34
+ def keyword_gen(kw_model, sequence:str):
35
+ keywords = kw_model.extract_keywords(
36
+ sequence,
37
+ keyphrase_ngram_range=(1, 2),
38
+ stop_words='english',
39
+ use_mmr=True,
40
+ diversity=0.5,
41
+ top_n=10
42
+ )
43
+ return keywords