File size: 6,895 Bytes
5d66f59
915b179
8c984b6
f8390d3
 
200c491
f8390d3
200c491
915b179
f8390d3
 
8c984b6
 
 
 
 
 
 
 
200c491
5d66f59
200c491
5d66f59
 
bc3acf5
200c491
bc3acf5
 
200c491
5d66f59
 
200c491
 
5d66f59
 
 
200c491
 
5d66f59
 
 
f8390d3
8c984b6
 
200c491
 
 
 
 
8c984b6
200c491
8c984b6
 
 
 
 
200c491
 
 
 
8c984b6
200c491
8c984b6
200c491
 
8c984b6
200c491
8c984b6
200c491
8c984b6
200c491
 
 
 
 
5d66f59
200c491
bc3acf5
5d66f59
 
 
 
8c984b6
1dbbd42
 
f8390d3
200c491
8c984b6
 
200c491
8c984b6
 
 
 
 
 
 
 
 
5d66f59
8c984b6
 
 
200c491
1dbbd42
8c984b6
 
200c491
5d66f59
 
 
 
915b179
200c491
5d66f59
8c984b6
200c491
5d66f59
 
 
1dbbd42
200c491
1dbbd42
200c491
1dbbd42
5d66f59
200c491
5d66f59
 
 
f8390d3
 
 
5d66f59
f8390d3
 
 
8c984b6
1dbbd42
8c984b6
f8390d3
 
200c491
 
 
 
f8390d3
200c491
 
5d66f59
f8390d3
ab21604
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import re
import logging
import os
from flask import Flask, request, Response
import requests
from urllib.parse import urlparse, unquote

# --- Basic Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)

# --- Authentication Configuration ---
# Read the secret key from an environment variable.
# The service will not start if this key is not set for security reasons.
SECRET_KEY = os.environ.get('PROXY_SECRET_KEY')
if not SECRET_KEY:
    logging.critical("FATAL: Environment variable PROXY_SECRET_KEY is not set. Service cannot start.")
    exit("Error: The PROXY_SECRET_KEY environment variable must be set.")

# --- Whitelisted URL Patterns for GitHub ---
ALLOWED_PATTERNS = [
    # Repositories: releases, archives, blobs, raw content
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
    
    # Git operations (clone, pull, push)
    re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*$', re.IGNORECASE),
    
    # Raw content from various GitHub domains
    re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
    
    # Repository tags and assets
    re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
    re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
    re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),

    # Main repository/user pages
    re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
    re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]

# --- Custom Index Page (Updated with Authentication Info) ---
INDEX_PAGE_HTML = f"""
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Private GitHub Proxy</title>
    <style>
        body {{ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; line-height: 1.6; color: #333; max-width: 800px; margin: 40px auto; padding: 20px; }}
        .container {{ border: 1px solid #ddd; border-radius: 8px; padding: 20px 40px; background-color: #f9f9f9; }}
        .warning {{ color: #856404; background-color: #fff3cd; border: 1px solid #ffeeba; padding: 15px; border-radius: 4px; margin-bottom: 20px; }}
        h1, h2 {{ border-bottom: 1px solid #eaecef; padding-bottom: 0.3em; }}
        code {{ background-color: #eef; padding: 2px 4px; border-radius: 3px; font-family: "SFMono-Regular", Consolas, "Liberation Mono", Menlo, Courier, monospace; }}
    </style>
</head>
<body>
    <div class="container">
        <h1>Private GitHub Reverse Proxy</h1>
        <div class="warning">
            <strong>Authentication Required:</strong> This is a private proxy. You must include your secret key in the URL to access content.
        </div>
        <h2>How to Use</h2>
        <p>To access GitHub content, prepend your secret key to the GitHub URL.</p>
        <p>For example, to clone a repository:</p>
        <code>git clone {{YOUR_PROXY_URL}}/{SECRET_KEY}/https://github.com/owner/repo.git</code>
        <p>Or to view a repository page:</p>
        <code>{{YOUR_PROXY_URL}}/{SECRET_KEY}/https://github.com/owner/repo</code>
    </div>
</body>
</html>
"""

def is_url_allowed(url):
    """Check if the given URL matches any pattern in the whitelist."""
    for pattern in ALLOWED_PATTERNS:
        if pattern.match(url):
            return True
    return False

# --- Core Proxy Logic (Updated with Authentication) ---
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
def proxy(path):
    """
    Authenticates the request via a secret key in the path, then proxies
    the request to GitHub after validating it against a whitelist.
    """
    # Split the path to separate the secret key from the target URL.
    # e.g., "mysecretkey/https://github.com/user/repo" -> ["mysecretkey", "https://..."]
    path_parts = path.split('/', 1)

    # --- Authentication Check ---
    # The path must contain a key. If not, or if the key is wrong, deny access.
    if len(path_parts) < 1 or path_parts[0] != SECRET_KEY:
        logging.warning(f"Authentication failed for request from {request.remote_addr}. Path: '{path}'")
        return "<h1>401 Unauthorized</h1><p>A valid secret key is required in the URL path.</p>", 401

    # If the key is correct but there is no target URL, show the index page.
    # This happens when accessing /<secret_key>/
    if len(path_parts) == 1 or not path_parts[1]:
        return INDEX_PAGE_HTML, 200

    target_path = unquote(path_parts[1])
    
    # Prepend 'https://' if the scheme is missing.
    if not target_path.startswith(('http://', 'https://')):
        target_url = 'https://' + target_path
    else:
        target_url = target_path
    
    # Security check: Ensure the URL is in the whitelist.
    if not is_url_allowed(target_url):
        logging.warning(f"URL Denied! Key was correct, but pattern not matched: {target_url}")
        return "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>", 403

    try:
        target_host = urlparse(target_url).hostname
        if not target_host:
            raise ValueError("Hostname could not be parsed from the target URL.")
    except Exception as e:
        logging.error(f"Invalid target URL provided: {target_url} | Error: {e}")
        return f"Invalid target URL in path: {e}", 400

    # Forward headers, but set the 'Host' header to the target's hostname.
    headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
    headers['Host'] = target_host

    try:
        resp = requests.request(
            method=request.method,
            url=target_url,
            headers=headers,
            data=request.get_data(),
            cookies=request.cookies,
            allow_redirects=False,
            stream=True,
            timeout=30
        )
        excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
        response_headers = [(name, value) for (name, value) in resp.raw.headers.items() 
                            if name.lower() not in excluded_headers]
        return Response(resp.iter_content(chunk_size=8192), resp.status_code, response_headers)
    
    except requests.exceptions.RequestException as e:
        logging.error(f"Error while proxying to {target_url}: {e}")
        return "An error occurred while proxying the request.", 502

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=7860)