File size: 8,024 Bytes
f957846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5345e1f
 
f957846
 
 
 
 
 
5345e1f
f957846
 
5345e1f
 
f957846
 
 
 
 
5345e1f
 
f957846
5345e1f
f957846
 
 
 
 
 
 
 
 
 
 
 
 
5345e1f
 
f957846
 
 
 
 
5345e1f
f957846
 
5345e1f
 
f957846
 
 
 
 
 
 
5345e1f
 
f957846
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
"""
Secure path utilities to prevent path injection attacks.

This module provides secure alternatives to os.path operations that validate
and sanitize file paths to prevent directory traversal and other path-based attacks.
"""

import logging
import os
import re
from pathlib import Path
from typing import Optional, Union

logger = logging.getLogger(__name__)


def sanitize_filename(filename: str, max_length: int = 255) -> str:
    """
    Sanitize a filename to prevent path injection attacks.

    Args:
        filename: The filename to sanitize
        max_length: Maximum length of the sanitized filename

    Returns:
        A sanitized filename safe for use in file operations

    Raises:
        ValueError: If the filename cannot be sanitized safely
    """
    if not filename or not isinstance(filename, str):
        raise ValueError("Filename must be a non-empty string")

    # Remove any path separators and normalize
    filename = os.path.basename(filename)

    # Remove or replace dangerous characters
    # Keep alphanumeric, dots, hyphens, underscores, spaces, parentheses, brackets, and other safe chars
    # Only remove truly dangerous characters like path separators and control chars
    sanitized = re.sub(r'[<>:"|?*\x00-\x1f]', "_", filename)

    # Remove multiple consecutive dots (except for file extensions)
    sanitized = re.sub(r"\.{2,}", ".", sanitized)

    # Remove leading/trailing dots and spaces
    sanitized = sanitized.strip(". ")

    # Ensure it's not empty after sanitization
    if not sanitized:
        sanitized = "sanitized_file"

    # Truncate if too long, preserving extension
    if len(sanitized) > max_length:
        name, ext = os.path.splitext(sanitized)
        max_name_length = max_length - len(ext)
        sanitized = name[:max_name_length] + ext

    return sanitized


def secure_path_join(base_path: Union[str, Path], *path_parts: str) -> Path:
    """
    Safely join paths while preventing directory traversal attacks.

    Args:
        base_path: The base directory path
        *path_parts: Additional path components to join

    Returns:
        A Path object representing the safe joined path

    Raises:
        ValueError: If any path component contains dangerous characters
        PermissionError: If the resulting path would escape the base directory
    """
    base_path = Path(base_path).resolve()

    # Sanitize each path part - only sanitize if it contains dangerous patterns
    sanitized_parts = []
    for part in path_parts:
        if not part:
            continue
        # Only sanitize if the part contains dangerous patterns
        if re.search(r'[<>:"|?*\x00-\x1f]|\.{2,}', part):
            sanitized_part = sanitize_filename(part)
        else:
            sanitized_part = part
        sanitized_parts.append(sanitized_part)

    # Join the paths
    result_path = base_path
    for part in sanitized_parts:
        result_path = result_path / part

    # Resolve the final path
    result_path = result_path.resolve()

    # Security check: ensure the result is within the base directory
    try:
        result_path.relative_to(base_path)
    except ValueError:
        raise PermissionError(f"Path would escape base directory: {result_path}")

    return result_path


def secure_file_write(
    base_path: Union[str, Path],
    filename: str,
    content: str,
    mode: str = "w",
    encoding: Optional[str] = None,
    **kwargs,
) -> None:
    """
    Safely write content to a file within a base directory with path validation.

    Args:
        base_path: The base directory under which to write the file
        filename: The target file name or relative path (untrusted)
        content: The content to write
        mode: File open mode (default: 'w')
        encoding: Text encoding (default: None for binary mode)
        **kwargs: Additional arguments for open()
    """
    # Use secure_path_join to ensure the final path is within base_path and to sanitize filename
    file_path = secure_path_join(base_path, filename)

    # Ensure the parent directory exists AFTER joining and securing the final path
    file_path.parent.mkdir(parents=True, exist_ok=True)

    # Write the file
    open_kwargs = {"mode": mode}
    if encoding:
        open_kwargs["encoding"] = encoding
    open_kwargs.update(kwargs)

    with open(file_path, **open_kwargs) as f:
        f.write(content)


def secure_file_read(
    base_path: Union[str, Path],
    filename: str,
    mode: str = "r",
    encoding: Optional[str] = None,
    **kwargs,
) -> str:
    """
    Safely read content from a file within a base directory with path validation.

    Args:
        base_path: The base directory under which to read the file
        filename: The target file name or relative path (untrusted)
        mode: File open mode (default: 'r')
        encoding: Text encoding (default: None for binary mode)
        **kwargs: Additional arguments for open()

    Returns:
        The file content
    """
    # Use secure_path_join to ensure the final path is within base_path and to sanitize filename
    file_path = secure_path_join(base_path, filename)

    # Validate the path exists and is a file
    if not file_path.exists():
        raise FileNotFoundError(f"File not found: {file_path}")

    if not file_path.is_file():
        raise ValueError(f"Path is not a file: {file_path}")

    # Read the file
    open_kwargs = {"mode": mode}
    if encoding:
        open_kwargs["encoding"] = encoding
    open_kwargs.update(kwargs)

    with open(file_path, **open_kwargs) as f:
        return f.read()


def validate_path_safety(
    path: Union[str, Path], base_path: Optional[Union[str, Path]] = None
) -> bool:
    """
    Validate that a path is safe and doesn't contain dangerous patterns.

    Args:
        path: The path to validate
        base_path: Optional base path to check against

    Returns:
        True if the path is safe, False otherwise
    """
    try:
        path = Path(path)

        # Check for dangerous patterns
        path_str = str(path)

        # Check for directory traversal patterns
        dangerous_patterns = [
            "..",  # Parent directory
            "//",  # Double slashes
            "\\",  # Backslashes (on Unix systems)
        ]

        for pattern in dangerous_patterns:
            if pattern in path_str:
                return False

        # If base path is provided, ensure the path is within it
        if base_path:
            base_path = Path(base_path).resolve()
            path = path.resolve()
            try:
                path.relative_to(base_path)
            except ValueError:
                return False

        return True

    except Exception:
        return False


# Backward compatibility functions that maintain the same interface as os.path
def secure_join(*paths: str) -> str:
    """
    Secure alternative to os.path.join that prevents path injection.

    Args:
        *paths: Path components to join

    Returns:
        A safe joined path string
    """
    if not paths:
        return ""

    # Use the first path as base, others as components
    base_path = Path(paths[0])
    path_parts = paths[1:]

    # Only use secure_path_join if there are potentially dangerous patterns
    if any(re.search(r'[<>:"|?*\x00-\x1f]|\.{2,}', part) for part in path_parts):
        result_path = secure_path_join(base_path, *path_parts)
        return str(result_path)
    else:
        # Use normal path joining for safe paths
        return str(Path(*paths))


def secure_basename(path: str) -> str:
    """
    Secure alternative to os.path.basename that sanitizes the result.

    Args:
        path: The path to get the basename from

    Returns:
        A sanitized basename
    """
    basename = os.path.basename(path)
    # Only sanitize if the basename contains dangerous patterns
    if re.search(r'[<>:"|?*\x00-\x1f]|\.{2,}', basename):
        return sanitize_filename(basename)
    else:
        return basename