import streamlit as st from bs4 import BeautifulSoup from nltk.tokenize import word_tokenize from nltk.corpus import wordnet import random import nltk nltk.download('punkt') nltk.download('wordnet') def paraphrase_text(text, synonyms_num=5, random_synonym=True): # Tokenize the text tokens = word_tokenize(text) # Create a list to hold the paraphrased words paraphrased_tokens = [] for token in tokens: # Check if the token is a word if token.isalpha(): # Get the synonyms of the word synonyms = [] for syn in wordnet.synsets(token): for lemma in syn.lemmas(): if lemma.name() != token: synonyms.append(lemma.name()) # If there are synonyms available, choose a random one if synonyms: if random_synonym: paraphrased_word = random.choice(synonyms) else: paraphrased_word = ", ".join(synonyms[:synonyms_num]) # If no synonyms are available, use the original word else: paraphrased_word = token # If the token is not a word, use it as-is else: paraphrased_word = token # Add the paraphrased word to the list paraphrased_tokens.append(paraphrased_word) # Join the paraphrased tokens back into a string paraphrased_text = ' '.join(paraphrased_tokens) return paraphrased_text def paraphrase_html(html_text, synonyms_num, random_synonym): # Parse the HTML using BeautifulSoup soup = BeautifulSoup(html_text, 'html.parser') # Find all the text nodes in the HTML text_nodes = soup.find_all(text=True) # Paraphrase the text nodes for node in text_nodes: node.replace_with(paraphrase_text(node.string, synonyms_num, random_synonym)) # Return the paraphrased HTML paraphrased_html = str(soup) return paraphrased_html st.set_page_config(page_title="HTML Paraphraser", page_icon=":pencil2:") st.title("HTML Paraphraser") synonyms_num = st.sidebar.slider("Number of Synonyms", min_value=1, max_value=10, value=5, step=1) random_synonym = st.sidebar.checkbox("Use Random Synonym", value=True) html_text = st.text_area("Enter HTML text to paraphrase", height=300) if st.button("Paraphrase"): paraphrased_html = paraphrase_html(html_text, synonyms_num, random_synonym) st.write(paraphrased_html, unsafe_allow_html=True)