import streamlit as st from keybert import KeyBERT # Create a KeyBERT instance kw_model = KeyBERT() # Define the Streamlit app def main(): st.title("Keyword Extraction") st.write("Enter your document below:") # Get user input doc = st.text_area("Document") # Get user choice for stopwords removal remove_stopwords = st.checkbox("Remove Stopwords") # Extract keywords if st.button("Extract Keywords"): keywords = kw_model.extract_keywords(doc, stop_words=None if remove_stopwords else "english") # Get user choice for MMR apply_mmr = st.checkbox("Apply Maximal Marginal Relevance (MMR)") if apply_mmr: # Apply Maximal Marginal Relevance (MMR) selected_keywords = [] selected_keywords.append(keywords[0]) # Select the top-scoring keyword # Set the MMR hyperparameters lambda_param = 0.7 # Weight for the trade-off between relevance and diversity num_keywords = 5 # Number of keywords to select for i in range(1, num_keywords): selected_keywords_scores = [kw[1] for kw in selected_keywords] remaining_keywords = [kw for kw in keywords if kw[0] not in [kw[0] for kw in selected_keywords]] mmr_scores = kw_model.maximal_marginal_relevance(doc, remaining_keywords, selected_keywords_scores, lambda_param) max_mmr_index = mmr_scores.index(max(mmr_scores)) selected_keywords.append(remaining_keywords[max_mmr_index]) keywords = selected_keywords # Update keywords with MMR-selected keywords st.write("Keywords:") for keyword, score in keywords: st.write(f"- {keyword} (Score: {score})") # Run the app if __name__ == "__main__": main()