File size: 4,094 Bytes
5d66f59
f8390d3
 
1dbbd42
f8390d3
 
 
1dbbd42
5d66f59
 
 
1dbbd42
5d66f59
 
 
 
 
 
 
 
f8390d3
5d66f59
1dbbd42
5d66f59
 
 
 
 
1dbbd42
5d66f59
1dbbd42
 
 
f8390d3
 
1dbbd42
f8390d3
 
1dbbd42
 
5d66f59
 
1dbbd42
5d66f59
 
 
1dbbd42
 
 
 
 
 
 
5d66f59
 
 
 
 
1dbbd42
5d66f59
 
 
 
 
 
 
 
1dbbd42
5d66f59
 
1dbbd42
 
 
 
5d66f59
 
 
 
f8390d3
 
 
5d66f59
f8390d3
 
 
5d66f59
1dbbd42
 
f8390d3
 
 
 
 
 
 
 
5d66f59
 
f8390d3
 
1dbbd42
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import re
from flask import Flask, request, Response
import requests
from urllib.parse import urlparse

app = Flask(__name__)

# --- Whitelist filtering rules (Unchanged) ---
ALLOWED_PATTERNS = [
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE), # This is key for git clone
    re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
    re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]

def is_url_allowed(url):
    """Checks if the given URL matches any pattern in the whitelist."""
    for pattern in ALLOWED_PATTERNS:
        if pattern.match(url):
            return True
    return False

# --- Core Proxy Logic ---

# A single, consolidated route to capture all requests and methods
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
def proxy(path):
    """
    A universal reverse proxy that takes the target URL as part of the path.
    """
    
    # --- 1. Construct target URL from the request path ---
    # request.full_path includes the query string, which is essential for git
    target_path = request.full_path
    
    # Remove the leading slash
    if target_path.startswith('/'):
        target_path = target_path[1:]

    # If the path is empty (root request), return a simple landing page
    if not target_path:
        return ("<p>This is a GitHub reverse proxy. Usage:</p>"
                "<p><code>&lt;proxy_url&gt;/&lt;target_github_url&gt;</code></p>"
                "<p>Example: <code>/github.com/python/cpython.git</code></p>"), 200

    # Prepend https:// if no scheme is present
    if not target_path.startswith(('http://', 'https://')):
        target_url = 'https://' + target_path
    else:
        target_url = target_path
        
    # --- 2. Perform security filter check ---
    if not is_url_allowed(target_url):
        error_message = (
            "<h1>403 Forbidden</h1>"
            "<p>This request is blocked by the proxy's security policy.</p>"
            f"<p>Blocked URL: {target_url}</p>"
        )
        return error_message, 403

    # --- 3. Forward the request ---
    try:
        target_host = urlparse(target_url).hostname
        if not target_host:
            raise ValueError("Could not parse hostname from target URL")
    except Exception as e:
        return f"Invalid target URL in path: {e}", 400

    headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
    headers['Host'] = target_host

    try:
        resp = requests.request(
            method=request.method,
            url=target_url,
            headers=headers,
            data=request.get_data(),
            cookies=request.cookies,
            allow_redirects=False,
            stream=True,
            timeout=30  # Added a timeout for robustness
        )

        excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
        response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]

        return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)

    except requests.exceptions.RequestException as e:
        return f"An error occurred while proxying: {e}", 502


if __name__ == '__main__':
    # For production, use a proper WSGI server like Gunicorn
    app.run(host='0.0.0.0', port=7860)