liuhua
liuhua
commited on
Commit
·
311da71
1
Parent(s):
1635b00
Fix potential SSRF attack vulnerability (#4334)
Browse files### What problem does this PR solve?
Fix potential SSRF attack vulnerability
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
Co-authored-by: liuhua <[email protected]>
- agent/component/crawler.py +1 -1
- api/utils/web_utils.py +24 -1
agent/component/crawler.py
CHANGED
|
@@ -41,7 +41,7 @@ class Crawler(ComponentBase, ABC):
|
|
| 41 |
ans = self.get_input()
|
| 42 |
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
| 43 |
if not is_valid_url(ans):
|
| 44 |
-
return Crawler.be_output("")
|
| 45 |
try:
|
| 46 |
result = asyncio.run(self.get_web(ans))
|
| 47 |
|
|
|
|
| 41 |
ans = self.get_input()
|
| 42 |
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
| 43 |
if not is_valid_url(ans):
|
| 44 |
+
return Crawler.be_output("URL not valid")
|
| 45 |
try:
|
| 46 |
result = asyncio.run(self.get_web(ans))
|
| 47 |
|
api/utils/web_utils.py
CHANGED
|
@@ -1,4 +1,7 @@
|
|
| 1 |
import re
|
|
|
|
|
|
|
|
|
|
| 2 |
import json
|
| 3 |
import base64
|
| 4 |
|
|
@@ -76,5 +79,25 @@ def __get_pdf_from_html(
|
|
| 76 |
return base64.b64decode(result["data"])
|
| 77 |
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
def is_valid_url(url: str) -> bool:
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
+
import socket
|
| 3 |
+
from urllib.parse import urlparse
|
| 4 |
+
import ipaddress
|
| 5 |
import json
|
| 6 |
import base64
|
| 7 |
|
|
|
|
| 79 |
return base64.b64decode(result["data"])
|
| 80 |
|
| 81 |
|
| 82 |
+
def is_private_ip(ip: str) -> bool:
|
| 83 |
+
try:
|
| 84 |
+
ip_obj = ipaddress.ip_address(ip)
|
| 85 |
+
return ip_obj.is_private
|
| 86 |
+
except ValueError:
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
def is_valid_url(url: str) -> bool:
|
| 90 |
+
if not re.match(r"(https?)://[-A-Za-z0-9+&@#/%?=~_|!:,.;]+[-A-Za-z0-9+&@#/%=~_|]", url):
|
| 91 |
+
return False
|
| 92 |
+
parsed_url = urlparse(url)
|
| 93 |
+
hostname = parsed_url.hostname
|
| 94 |
+
|
| 95 |
+
if not hostname:
|
| 96 |
+
return False
|
| 97 |
+
try:
|
| 98 |
+
ip = socket.gethostbyname(hostname)
|
| 99 |
+
if is_private_ip(ip):
|
| 100 |
+
return False
|
| 101 |
+
except socket.gaierror:
|
| 102 |
+
return False
|
| 103 |
+
return True
|