Spaces:
Sleeping
Sleeping
Update RemoveHTMLtags.py
Browse files- RemoveHTMLtags.py +9 -23
RemoveHTMLtags.py
CHANGED
@@ -1,8 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
# HTML Document
|
5 |
-
HTML_DOC = """
|
6 |
<html>
|
7 |
<head>
|
8 |
<title> Geeksforgeeks </title>
|
@@ -14,21 +11,10 @@ HTML_DOC = """
|
|
14 |
<div>Computer Science portal.</div>
|
15 |
</body>
|
16 |
</html>
|
17 |
-
"""
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
for data in soup(['style', 'script']):
|
26 |
-
# Remove tags
|
27 |
-
data.decompose()
|
28 |
-
|
29 |
-
# return data by retrieving the tag content
|
30 |
-
return ' '.join(soup.stripped_strings)
|
31 |
-
|
32 |
-
|
33 |
-
# Print the extracted data
|
34 |
-
print(remove_tags(HTML_DOC))
|
|
|
1 |
+
from bs4 import BeautifulSoup #line:2
|
2 |
+
HTML_DOC ="""
|
|
|
|
|
|
|
3 |
<html>
|
4 |
<head>
|
5 |
<title> Geeksforgeeks </title>
|
|
|
11 |
<div>Computer Science portal.</div>
|
12 |
</body>
|
13 |
</html>
|
14 |
+
"""#line:17
|
15 |
+
def remove_tags (O0OOO0OO000O0O00O ):#line:20
|
16 |
+
OOOO000O000OOOO00 =BeautifulSoup (O0OOO0OO000O0O00O ,"html.parser")#line:23
|
17 |
+
for OOO00O000O00O0O0O in OOOO000O000OOOO00 (['style','script']):#line:25
|
18 |
+
OOO00O000O00O0O0O .decompose ()#line:27
|
19 |
+
return ' '.join (OOOO000O000OOOO00 .stripped_strings )#line:30
|
20 |
+
print (remove_tags (HTML_DOC ))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|