File size: 8,024 Bytes
f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 5345e1f f957846 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
"""
Secure path utilities to prevent path injection attacks.
This module provides secure alternatives to os.path operations that validate
and sanitize file paths to prevent directory traversal and other path-based attacks.
"""
import logging
import os
import re
from pathlib import Path
from typing import Optional, Union
logger = logging.getLogger(__name__)
def sanitize_filename(filename: str, max_length: int = 255) -> str:
"""
Sanitize a filename to prevent path injection attacks.
Args:
filename: The filename to sanitize
max_length: Maximum length of the sanitized filename
Returns:
A sanitized filename safe for use in file operations
Raises:
ValueError: If the filename cannot be sanitized safely
"""
if not filename or not isinstance(filename, str):
raise ValueError("Filename must be a non-empty string")
# Remove any path separators and normalize
filename = os.path.basename(filename)
# Remove or replace dangerous characters
# Keep alphanumeric, dots, hyphens, underscores, spaces, parentheses, brackets, and other safe chars
# Only remove truly dangerous characters like path separators and control chars
sanitized = re.sub(r'[<>:"|?*\x00-\x1f]', "_", filename)
# Remove multiple consecutive dots (except for file extensions)
sanitized = re.sub(r"\.{2,}", ".", sanitized)
# Remove leading/trailing dots and spaces
sanitized = sanitized.strip(". ")
# Ensure it's not empty after sanitization
if not sanitized:
sanitized = "sanitized_file"
# Truncate if too long, preserving extension
if len(sanitized) > max_length:
name, ext = os.path.splitext(sanitized)
max_name_length = max_length - len(ext)
sanitized = name[:max_name_length] + ext
return sanitized
def secure_path_join(base_path: Union[str, Path], *path_parts: str) -> Path:
"""
Safely join paths while preventing directory traversal attacks.
Args:
base_path: The base directory path
*path_parts: Additional path components to join
Returns:
A Path object representing the safe joined path
Raises:
ValueError: If any path component contains dangerous characters
PermissionError: If the resulting path would escape the base directory
"""
base_path = Path(base_path).resolve()
# Sanitize each path part - only sanitize if it contains dangerous patterns
sanitized_parts = []
for part in path_parts:
if not part:
continue
# Only sanitize if the part contains dangerous patterns
if re.search(r'[<>:"|?*\x00-\x1f]|\.{2,}', part):
sanitized_part = sanitize_filename(part)
else:
sanitized_part = part
sanitized_parts.append(sanitized_part)
# Join the paths
result_path = base_path
for part in sanitized_parts:
result_path = result_path / part
# Resolve the final path
result_path = result_path.resolve()
# Security check: ensure the result is within the base directory
try:
result_path.relative_to(base_path)
except ValueError:
raise PermissionError(f"Path would escape base directory: {result_path}")
return result_path
def secure_file_write(
base_path: Union[str, Path],
filename: str,
content: str,
mode: str = "w",
encoding: Optional[str] = None,
**kwargs,
) -> None:
"""
Safely write content to a file within a base directory with path validation.
Args:
base_path: The base directory under which to write the file
filename: The target file name or relative path (untrusted)
content: The content to write
mode: File open mode (default: 'w')
encoding: Text encoding (default: None for binary mode)
**kwargs: Additional arguments for open()
"""
# Use secure_path_join to ensure the final path is within base_path and to sanitize filename
file_path = secure_path_join(base_path, filename)
# Ensure the parent directory exists AFTER joining and securing the final path
file_path.parent.mkdir(parents=True, exist_ok=True)
# Write the file
open_kwargs = {"mode": mode}
if encoding:
open_kwargs["encoding"] = encoding
open_kwargs.update(kwargs)
with open(file_path, **open_kwargs) as f:
f.write(content)
def secure_file_read(
base_path: Union[str, Path],
filename: str,
mode: str = "r",
encoding: Optional[str] = None,
**kwargs,
) -> str:
"""
Safely read content from a file within a base directory with path validation.
Args:
base_path: The base directory under which to read the file
filename: The target file name or relative path (untrusted)
mode: File open mode (default: 'r')
encoding: Text encoding (default: None for binary mode)
**kwargs: Additional arguments for open()
Returns:
The file content
"""
# Use secure_path_join to ensure the final path is within base_path and to sanitize filename
file_path = secure_path_join(base_path, filename)
# Validate the path exists and is a file
if not file_path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
if not file_path.is_file():
raise ValueError(f"Path is not a file: {file_path}")
# Read the file
open_kwargs = {"mode": mode}
if encoding:
open_kwargs["encoding"] = encoding
open_kwargs.update(kwargs)
with open(file_path, **open_kwargs) as f:
return f.read()
def validate_path_safety(
path: Union[str, Path], base_path: Optional[Union[str, Path]] = None
) -> bool:
"""
Validate that a path is safe and doesn't contain dangerous patterns.
Args:
path: The path to validate
base_path: Optional base path to check against
Returns:
True if the path is safe, False otherwise
"""
try:
path = Path(path)
# Check for dangerous patterns
path_str = str(path)
# Check for directory traversal patterns
dangerous_patterns = [
"..", # Parent directory
"//", # Double slashes
"\\", # Backslashes (on Unix systems)
]
for pattern in dangerous_patterns:
if pattern in path_str:
return False
# If base path is provided, ensure the path is within it
if base_path:
base_path = Path(base_path).resolve()
path = path.resolve()
try:
path.relative_to(base_path)
except ValueError:
return False
return True
except Exception:
return False
# Backward compatibility functions that maintain the same interface as os.path
def secure_join(*paths: str) -> str:
"""
Secure alternative to os.path.join that prevents path injection.
Args:
*paths: Path components to join
Returns:
A safe joined path string
"""
if not paths:
return ""
# Use the first path as base, others as components
base_path = Path(paths[0])
path_parts = paths[1:]
# Only use secure_path_join if there are potentially dangerous patterns
if any(re.search(r'[<>:"|?*\x00-\x1f]|\.{2,}', part) for part in path_parts):
result_path = secure_path_join(base_path, *path_parts)
return str(result_path)
else:
# Use normal path joining for safe paths
return str(Path(*paths))
def secure_basename(path: str) -> str:
"""
Secure alternative to os.path.basename that sanitizes the result.
Args:
path: The path to get the basename from
Returns:
A sanitized basename
"""
basename = os.path.basename(path)
# Only sanitize if the basename contains dangerous patterns
if re.search(r'[<>:"|?*\x00-\x1f]|\.{2,}', basename):
return sanitize_filename(basename)
else:
return basename
|