Spaces:
Sleeping
Sleeping
:zap: [Enhance] WebpageContentExtractor: Escape dash, and ignore
Browse files
networks/network_configs.py
CHANGED
|
@@ -10,13 +10,14 @@ IGNORE_CLASSES = [
|
|
| 10 |
"navbar",
|
| 11 |
# 163.com
|
| 12 |
"post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
|
| 13 |
-
"ntes
|
| 14 |
-
"nav
|
| 15 |
]
|
| 16 |
|
| 17 |
IGNORE_HOSTS = [
|
| 18 |
"weibo.com",
|
| 19 |
"hymson.com",
|
|
|
|
| 20 |
]
|
| 21 |
|
| 22 |
REQUESTS_HEADERS = {
|
|
|
|
| 10 |
"navbar",
|
| 11 |
# 163.com
|
| 12 |
"post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
|
| 13 |
+
"ntes\-.*nav",
|
| 14 |
+
"nav\-bottom",
|
| 15 |
]
|
| 16 |
|
| 17 |
IGNORE_HOSTS = [
|
| 18 |
"weibo.com",
|
| 19 |
"hymson.com",
|
| 20 |
+
"yahoo.com",
|
| 21 |
]
|
| 22 |
|
| 23 |
REQUESTS_HEADERS = {
|