Spaces:
Paused
Paused
| import streamlit as st | |
| from bs4 import BeautifulSoup | |
| from nltk.tokenize import word_tokenize | |
| from nltk.corpus import wordnet | |
| import random | |
| nltk.download('punkt') | |
| def paraphrase_text(text, synonyms_num=5, random_synonym=True): | |
| # Tokenize the text | |
| tokens = word_tokenize(text) | |
| # Create a list to hold the paraphrased words | |
| paraphrased_tokens = [] | |
| for token in tokens: | |
| # Check if the token is a word | |
| if token.isalpha(): | |
| # Get the synonyms of the word | |
| synonyms = [] | |
| for syn in wordnet.synsets(token): | |
| for lemma in syn.lemmas(): | |
| if lemma.name() != token: | |
| synonyms.append(lemma.name()) | |
| # If there are synonyms available, choose a random one | |
| if synonyms: | |
| if random_synonym: | |
| paraphrased_word = random.choice(synonyms) | |
| else: | |
| paraphrased_word = ", ".join(synonyms[:synonyms_num]) | |
| # If no synonyms are available, use the original word | |
| else: | |
| paraphrased_word = token | |
| # If the token is not a word, use it as-is | |
| else: | |
| paraphrased_word = token | |
| # Add the paraphrased word to the list | |
| paraphrased_tokens.append(paraphrased_word) | |
| # Join the paraphrased tokens back into a string | |
| paraphrased_text = ' '.join(paraphrased_tokens) | |
| return paraphrased_text | |
| def paraphrase_html(html_text, synonyms_num, random_synonym): | |
| # Parse the HTML using BeautifulSoup | |
| soup = BeautifulSoup(html_text, 'html.parser') | |
| # Find all the text nodes in the HTML | |
| text_nodes = soup.find_all(text=True) | |
| # Paraphrase the text nodes | |
| for node in text_nodes: | |
| node.replace_with(paraphrase_text(node.string, synonyms_num, random_synonym)) | |
| # Return the paraphrased HTML | |
| paraphrased_html = str(soup) | |
| return paraphrased_html | |
| st.set_page_config(page_title="HTML Paraphraser", page_icon=":pencil2:") | |
| st.title("HTML Paraphraser") | |
| synonyms_num = st.sidebar.slider("Number of Synonyms", min_value=1, max_value=10, value=5, step=1) | |
| random_synonym = st.sidebar.checkbox("Use Random Synonym", value=True) | |
| html_text = st.text_area("Enter HTML text to paraphrase", height=300) | |
| if st.button("Paraphrase"): | |
| paraphrased_html = paraphrase_html(html_text, synonyms_num, random_synonym) | |
| st.write(paraphrased_html, unsafe_allow_html=True) |