Spaces:
Running
Running
File size: 4,156 Bytes
2a39044 90f5392 892f484 7c08c7b 90f5392 2a39044 90f5392 2a39044 892f484 90f5392 79ce9cc 892f484 c79e0ff 892f484 f6942d6 892f484 7c08c7b 892f484 7c08c7b 2a39044 892f484 2a39044 79ce9cc 7c08c7b 79ce9cc 2a39044 7c08c7b 892f484 2a39044 892f484 2a39044 7c08c7b 2a39044 7c08c7b 79ce9cc 2a39044 c79e0ff 892f484 7c08c7b 2a39044 79ce9cc c79e0ff aacc39b 2a39044 c79e0ff 892f484 c79e0ff 892f484 2a39044 c79e0ff 2a39044 79ce9cc c79e0ff 79ce9cc c79e0ff 2a39044 c79e0ff 2a39044 aacc39b c79e0ff 892f484 90f5392 0db0051 2a39044 c79e0ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
from flask import Flask, request, jsonify, render_template_string
from sentence_transformers import SentenceTransformer, util
import logging
import sys
import signal
# 初始化 Flask 应用
app = Flask(__name__)
# 配置日志,级别设为 INFO
logging.basicConfig(level=logging.INFO)
app.logger = logging.getLogger("CodeSearchAPI")
# 预定义代码片段
CODE_SNIPPETS = [
"def sort_list(x): return sorted(x)",
"""def count_above_threshold(elements, threshold=0):
return sum(1 for e in elements if e > threshold)""",
"""def find_min_max(elements):
return min(elements), max(elements)"""
"""def count_evens(nums):
return len([n for n in nums if n % 2 == 0])""",
"""def reverse_string(s):
return s[::-1]""",
"""def is_prime(n):
if n < 2:
return False
for i in range(2, int(n**0.5)+1):
if n % i == 0:
return False
return True""",
"""def factorial(n):
result = 1
for i in range(1, n+1):
result *= i
return result""",
"""def sum_of_squares(nums):
return sum(map(lambda x: x**2, nums))"""
]
# 全局服务状态
service_ready = False
# 优雅关闭处理
def handle_shutdown(signum, frame):
app.logger.info("收到终止信号,开始关闭...")
sys.exit(0)
signal.signal(signal.SIGTERM, handle_shutdown)
signal.signal(signal.SIGINT, handle_shutdown)
# 初始化模型和预计算编码
try:
app.logger.info("开始加载模型...")
model = SentenceTransformer(
"flax-sentence-embeddings/st-codesearch-distilroberta-base",
cache_folder="/model-cache"
)
# 预计算代码片段的编码(强制使用 CPU)
code_emb = model.encode(CODE_SNIPPETS, convert_to_tensor=True, device="cpu")
service_ready = True
app.logger.info("服务初始化完成")
except Exception as e:
app.logger.error("初始化失败: %s", str(e))
raise
# Hugging Face 健康检查端点,必须响应根路径
@app.route('/')
def hf_health_check():
# 如果请求接受 HTML,则返回一个简单的 HTML 页面(包含测试链接)
if request.accept_mimetypes.accept_html:
html = """
<h2>CodeSearch API</h2>
<p>服务状态:{{ status }}</p>
<p>你可以在地址栏输入 /search?query=你的查询 来测试接口</p>
"""
status = "ready" if service_ready else "initializing"
return render_template_string(html, status=status)
# 否则返回 JSON 格式的健康检查
if service_ready:
return jsonify({"status": "ready"}), 200
else:
return jsonify({"status": "initializing"}), 503
# 搜索 API 端点,同时支持 GET 和 POST 请求
@app.route('/search', methods=['GET', 'POST'])
def handle_search():
if not service_ready:
app.logger.info("服务未就绪")
return jsonify({"error": "服务正在初始化"}), 503
try:
# 根据请求方法提取查询内容
if request.method == 'GET':
query = request.args.get('query', '').strip()
else:
data = request.get_json() or {}
query = data.get('query', '').strip()
if not query:
app.logger.info("收到空的查询请求")
return jsonify({"error": "查询不能为空"}), 400
# 记录接收到的查询
app.logger.info("收到查询请求: %s", query)
# 对查询进行编码,并进行语义搜索
query_emb = model.encode(query, convert_to_tensor=True, device="cpu")
hits = util.semantic_search(query_emb, code_emb, top_k=1)[0]
best = hits[0]
result = {
"code": CODE_SNIPPETS[best['corpus_id']],
"score": round(float(best['score']), 4)
}
# 记录返回结果
app.logger.info("返回结果: %s", result)
return jsonify(result)
except Exception as e:
app.logger.error("请求处理失败: %s", str(e))
return jsonify({"error": "服务器内部错误"}), 500
if __name__ == "__main__":
# 本地测试用,Hugging Face Spaces 通常通过 gunicorn 启动
app.run(host='0.0.0.0', port=7860)
|