File size: 2,482 Bytes
4d483b8
86fe5f4
 
 
4d483b8
68a3780
c6edc92
d6a3f78
86fe5f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import streamlit as st
from bs4 import BeautifulSoup
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
import random
import nltk
nltk.download('punkt')
nltk.download('wordnet')
def paraphrase_text(text, synonyms_num=5, random_synonym=True):
    # Tokenize the text
    tokens = word_tokenize(text)
    # Create a list to hold the paraphrased words
    paraphrased_tokens = []
    for token in tokens:
        # Check if the token is a word
        if token.isalpha():
            # Get the synonyms of the word
            synonyms = []
            for syn in wordnet.synsets(token):
                for lemma in syn.lemmas():
                    if lemma.name() != token:
                        synonyms.append(lemma.name())
            # If there are synonyms available, choose a random one
            if synonyms:
                if random_synonym:
                    paraphrased_word = random.choice(synonyms)
                else:
                    paraphrased_word = ", ".join(synonyms[:synonyms_num])
            # If no synonyms are available, use the original word
            else:
                paraphrased_word = token
        # If the token is not a word, use it as-is
        else:
            paraphrased_word = token
        # Add the paraphrased word to the list
        paraphrased_tokens.append(paraphrased_word)
    # Join the paraphrased tokens back into a string
    paraphrased_text = ' '.join(paraphrased_tokens)
    return paraphrased_text

def paraphrase_html(html_text, synonyms_num, random_synonym):
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(html_text, 'html.parser')
    # Find all the text nodes in the HTML
    text_nodes = soup.find_all(text=True)
    # Paraphrase the text nodes
    for node in text_nodes:
        node.replace_with(paraphrase_text(node.string, synonyms_num, random_synonym))
    # Return the paraphrased HTML
    paraphrased_html = str(soup)
    return paraphrased_html

st.set_page_config(page_title="HTML Paraphraser", page_icon=":pencil2:")
st.title("HTML Paraphraser")

synonyms_num = st.sidebar.slider("Number of Synonyms", min_value=1, max_value=10, value=5, step=1)
random_synonym = st.sidebar.checkbox("Use Random Synonym", value=True)

html_text = st.text_area("Enter HTML text to paraphrase", height=300)

if st.button("Paraphrase"):
    paraphrased_html = paraphrase_html(html_text, synonyms_num, random_synonym)
    st.write(paraphrased_html, unsafe_allow_html=True)