Daniel Tse
commited on
Commit
·
4fa56af
1
Parent(s):
4bce6ba
Remove max_len and min_len
Browse files
app.py
CHANGED
|
@@ -9,6 +9,9 @@ from nltk import sent_tokenize
|
|
| 9 |
nltk.download('punkt')
|
| 10 |
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
def transcribe_audio(audiofile):
|
| 13 |
|
| 14 |
st.session_state['audio'] = audiofile
|
|
@@ -73,10 +76,17 @@ def summarize_podcast(audiotranscription):
|
|
| 73 |
st.info("Chunking text")
|
| 74 |
text_chunks = chunk_and_preprocess_text(audiotranscription)
|
| 75 |
|
| 76 |
-
summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
|
|
|
|
| 77 |
st.session_state['summary'] = summarized_text
|
| 78 |
return summarized_text
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
st.markdown("# Podcast Q&A")
|
| 82 |
|
|
|
|
| 9 |
nltk.download('punkt')
|
| 10 |
|
| 11 |
|
| 12 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 13 |
+
|
| 14 |
+
|
| 15 |
def transcribe_audio(audiofile):
|
| 16 |
|
| 17 |
st.session_state['audio'] = audiofile
|
|
|
|
| 76 |
st.info("Chunking text")
|
| 77 |
text_chunks = chunk_and_preprocess_text(audiotranscription)
|
| 78 |
|
| 79 |
+
#summarized_text = summarizer(text_chunks, max_len=200,min_len=50)
|
| 80 |
+
summarized_text = summarizer(text_chunks)
|
| 81 |
st.session_state['summary'] = summarized_text
|
| 82 |
return summarized_text
|
| 83 |
+
|
| 84 |
+
def prepare_text_for_qa(audiotranscription):
|
| 85 |
+
|
| 86 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
|
| 87 |
+
documents = text_splitter.split_documents(audiotranscription)
|
| 88 |
+
revalue = ""
|
| 89 |
+
return revalue
|
| 90 |
|
| 91 |
st.markdown("# Podcast Q&A")
|
| 92 |
|