File size: 4,094 Bytes
5d66f59 f8390d3 1dbbd42 f8390d3 1dbbd42 5d66f59 1dbbd42 5d66f59 f8390d3 5d66f59 1dbbd42 5d66f59 1dbbd42 5d66f59 1dbbd42 f8390d3 1dbbd42 f8390d3 1dbbd42 5d66f59 1dbbd42 5d66f59 1dbbd42 5d66f59 1dbbd42 5d66f59 1dbbd42 5d66f59 1dbbd42 5d66f59 f8390d3 5d66f59 f8390d3 5d66f59 1dbbd42 f8390d3 5d66f59 f8390d3 1dbbd42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import re
from flask import Flask, request, Response
import requests
from urllib.parse import urlparse
app = Flask(__name__)
# --- Whitelist filtering rules (Unchanged) ---
ALLOWED_PATTERNS = [
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE), # This is key for git clone
re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]
def is_url_allowed(url):
"""Checks if the given URL matches any pattern in the whitelist."""
for pattern in ALLOWED_PATTERNS:
if pattern.match(url):
return True
return False
# --- Core Proxy Logic ---
# A single, consolidated route to capture all requests and methods
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
def proxy(path):
"""
A universal reverse proxy that takes the target URL as part of the path.
"""
# --- 1. Construct target URL from the request path ---
# request.full_path includes the query string, which is essential for git
target_path = request.full_path
# Remove the leading slash
if target_path.startswith('/'):
target_path = target_path[1:]
# If the path is empty (root request), return a simple landing page
if not target_path:
return ("<p>This is a GitHub reverse proxy. Usage:</p>"
"<p><code><proxy_url>/<target_github_url></code></p>"
"<p>Example: <code>/github.com/python/cpython.git</code></p>"), 200
# Prepend https:// if no scheme is present
if not target_path.startswith(('http://', 'https://')):
target_url = 'https://' + target_path
else:
target_url = target_path
# --- 2. Perform security filter check ---
if not is_url_allowed(target_url):
error_message = (
"<h1>403 Forbidden</h1>"
"<p>This request is blocked by the proxy's security policy.</p>"
f"<p>Blocked URL: {target_url}</p>"
)
return error_message, 403
# --- 3. Forward the request ---
try:
target_host = urlparse(target_url).hostname
if not target_host:
raise ValueError("Could not parse hostname from target URL")
except Exception as e:
return f"Invalid target URL in path: {e}", 400
headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
headers['Host'] = target_host
try:
resp = requests.request(
method=request.method,
url=target_url,
headers=headers,
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False,
stream=True,
timeout=30 # Added a timeout for robustness
)
excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
except requests.exceptions.RequestException as e:
return f"An error occurred while proxying: {e}", 502
if __name__ == '__main__':
# For production, use a proper WSGI server like Gunicorn
app.run(host='0.0.0.0', port=7860) |