File size: 4,534 Bytes
5d66f59 915b179 f8390d3 1dbbd42 f8390d3 915b179 f8390d3 915b179 5d66f59 915b179 5d66f59 f8390d3 5d66f59 915b179 5d66f59 915b179 5d66f59 915b179 5d66f59 915b179 5d66f59 1dbbd42 f8390d3 915b179 f8390d3 915b179 5d66f59 1dbbd42 915b179 1dbbd42 5d66f59 915b179 5d66f59 915b179 5d66f59 915b179 5d66f59 1dbbd42 915b179 1dbbd42 915b179 1dbbd42 5d66f59 915b179 5d66f59 f8390d3 5d66f59 f8390d3 5d66f59 1dbbd42 915b179 f8390d3 915b179 f8390d3 915b179 f8390d3 915b179 5d66f59 f8390d3 1dbbd42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import re
import logging
from flask import Flask, request, Response
import requests
from urllib.parse import urlparse
# --- 设置日志,使其能输出到Hugging Face的控制台 ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
app = Flask(__name__)
# --- 白名单过滤规则 (保持不变) ---
ALLOWED_PATTERNS = [
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:releases|archive)/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:blob|raw)/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/(?:info|git-).*/.*$', re.IGNORECASE),
re.compile(r'^https://raw\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
re.compile(r'^https://gist\.(?:githubusercontent|github)\.com/[^/]+/[^/]+/.*/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/tags.*$', re.IGNORECASE),
re.compile(r'^https://avatars\.githubusercontent\.com/.*$', re.IGNORECASE),
re.compile(r'^https://github\.githubassets\.com/.*$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/?$', re.IGNORECASE),
re.compile(r'^https://github\.com/[^/]+/[^/]+/?$', re.IGNORECASE),
]
def is_url_allowed(url):
logging.info(f"Checking URL against whitelist: {url}")
for i, pattern in enumerate(ALLOWED_PATTERNS):
if pattern.match(url):
logging.info(f"URL Matched! Pattern index: {i}")
return True
logging.warning(f"URL Denied! No pattern matched: {url}")
return False
# --- 核心代理逻辑 ---
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST', 'PUT', 'DELETE'])
@app.route('/<path:path>', methods=['GET', 'POST', 'PUT', 'DELETE'])
def proxy(path):
logging.info("="*50)
logging.info(f"--- New request received ---")
# 打印最原始的请求信息
logging.info(f"Request Method: {request.method}")
logging.info(f"Request Path (from Flask): /<path:path> = /{path}")
logging.info(f"Request Full Path (raw): {request.full_path}")
logging.info(f"Request Headers:\n{request.headers}")
# --- 1. 从请求路径中构建目标URL ---
target_path = request.full_path
if target_path.startswith('/'):
target_path = target_path[1:]
if not target_path:
logging.info("Root path request. Returning info page.")
return ("<p>GitHub reverse proxy is active. Use proxy_url/target_url format.</p>"), 200
if not target_path.startswith(('http://', 'https://')):
target_url = 'https://' + target_path
else:
target_url = target_path
logging.info(f"Constructed target URL: {target_url}")
# --- 2. 执行安全过滤检查 ---
if not is_url_allowed(target_url):
error_message = "<h1>403 Forbidden</h1><p>Request blocked by proxy security policy.</p>"
return error_message, 403
# --- 3. 转发请求 ---
try:
target_host = urlparse(target_url).hostname
if not target_host:
raise ValueError("Could not parse hostname")
except Exception as e:
logging.error(f"Failed to parse hostname from URL '{target_url}': {e}")
return f"Invalid target URL in path: {e}", 400
headers = {key: value for (key, value) in request.headers if key.lower() != 'host'}
headers['Host'] = target_host
logging.info(f"Forwarding request to {target_url} with headers:\n{headers}")
try:
resp = requests.request(
method=request.method,
url=target_url,
headers=headers,
data=request.get_data(),
cookies=request.cookies,
allow_redirects=False,
stream=True,
timeout=30
)
logging.info(f"Received response with status code: {resp.status_code}")
excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
response_headers = [(name, value) for (name, value) in resp.raw.headers.items() if name.lower() not in excluded_headers]
logging.info("Streaming response back to client.")
logging.info("="*50 + "\n")
return Response(resp.iter_content(chunk_size=8192), status=resp.status_code, headers=response_headers)
except requests.exceptions.RequestException as e:
logging.error(f"Error while proxying request to {target_url}: {e}")
logging.info("="*50 + "\n")
return f"An error occurred while proxying: {e}", 502
if __name__ == '__main__':
app.run(host='0.0.0.0', port=7860) |