import re import logging import os from flask import Flask, request, Response import requests from urllib.parse import urlparse, unquote # --- Basic Configuration --- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') app = Flask(__name__) # --- Authentication Configuration --- # Read the secret key from an environment variable. # The service will not start if this key is not set for security reasons. SECRET_KEY = os.environ.get('PROXY_SECRET_KEY') if not SECRET_KEY: logging.critical("FATAL: Environment variable PROXY_SECRET_KEY is not set. Service cannot start.") exit("Error: The PROXY_SECRET_KEY environment variable must be set.") # --- Whitelisted URL Patterns for GitHub --- ALLOWED_PATTERNS = [ # Repositories: releases, archives, blobs, raw content re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE), re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE), # Git operations (clone, pull, push) re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE), # Raw content from various GitHub domains re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE), re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE), # Repository tags and assets re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE), re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE), re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE), # Main repository/user pages re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE), re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE), ] # --- Custom Index Page (Updated with Authentication Info) --- INDEX_PAGE_HTML = f""" Private GitHub Proxy

Private GitHub Reverse Proxy

Authentication Required: This is a private proxy. You must include your secret key in the URL to access content.

How to Use

To access GitHub content, prepend your secret key to the GitHub URL.

For example, to clone a repository:

git clone {{YOUR_PROXY_URL}}/{SECRET_KEY}/https://github.com/owner/repo.git

Or to view a repository page:

{{YOUR_PROXY_URL}}/{SECRET_KEY}/https://github.com/owner/repo
""" def is_url_allowed(url): """Check if the given URL matches any pattern in the whitelist.""" for pattern in ALLOWED_PATTERNS: if pattern.match(url): return True return False # --- Core Proxy Logic (Updated with Authentication) --- @app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE']) @app.route('/', methods=['GET', 'POST', 'PUT', 'DELETE']) def proxy(path): """ Authenticates the request via a secret key in the path, then proxies the request to GitHub after validating it against a whitelist. """ # Split the path to separate the secret key from the target URL. # e.g., "mysecretkey/https://github.com/user/repo" -> ["mysecretkey", "https://..."] path_parts = path.split('/', 1) # --- Authentication Check --- # The path must contain a key. If not, or if the key is wrong, deny access. if len(path_parts) < 1 or path_parts[0] != SECRET_KEY: logging.warning(f"Authentication failed for request from {request.remote_addr}. Path: '{path}'") return "

401 Unauthorized

A valid secret key is required in the URL path.

", 401 # If the key is correct but there is no target URL, show the index page. # This happens when accessing // if len(path_parts) == 1 or not path_parts[1]: return INDEX_PAGE_HTML, 200 target_path = unquote(path_parts[1]) # Prepend 'https://' if the scheme is missing. if not target_path.startswith(('http://', 'https://')): target_url = 'https://' + target_path else: target_url = target_path # Security check: Ensure the URL is in the whitelist. if not is_url_allowed(target_url): logging.warning(f"URL Denied! Key was correct, but pattern not matched: {target_url}") return "

403 Forbidden

Request blocked by proxy security policy.

", 403 try: target_host = urlparse(target_url).hostname if not target_host: raise ValueError("Hostname could not be parsed from the target URL.") except Exception as e: logging.error(f"Invalid target URL provided: {target_url} | Error: {e}") return f"Invalid target URL in path: {e}", 400 # Forward headers, but set the 'Host' header to the target's hostname. headers = {key: value for (key, value) in request.headers if key.lower() != 'host'} headers['Host'] = target_host try: resp = requests.request( method=request.method, url=target_url, headers=headers, data=request.get_data(), cookies=request.cookies, allow_redirects=False, stream=True, timeout=30 ) excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection'] response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers] return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers) except requests.exceptions.RequestException as e: logging.error(f"Error while proxying to {target_url}: {e}") return "An error occurred while proxying the request.", 502 if __name__ == '__main__': app.run(host='0.0.0.0', port=7860)