code-audit-api / app.py
Forrest99's picture
Update app.py
981a5ac verified
raw
history blame
1.73 kB
import os
import re
from fastapi import FastAPI
from transformers import AutoTokenizer, T5ForConditionalGeneration
os.environ["HF_HOME"] = "/app/.cache/huggingface"
app = FastAPI()
# 初始化模型
try:
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-small")
model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-small")
except Exception as e:
raise RuntimeError(f"模型加载失败: {str(e)}")
def sanitize_code(code: str) -> str:
"""清洗输入代码"""
code = re.sub(r"[<>&\"']", "", code) # 过滤危险字符
return code[:1024] # 限制输入长度
@app.get("/analyze")
async def analyze_get(code: str):
try:
# 清洗输入
code = sanitize_code(code)
# 构造提示词
prompt = f"""Analyze the following code for security vulnerabilities in Chinese.
重点检查SQL注入、XSS、命令注入、路径遍历等问题。
按此格式返回:\n[漏洞类型]: [风险描述]\n\n代码:\n{code}"""
# Tokenize输入
inputs = tokenizer(
prompt,
return_tensors="pt",
max_length=512,
truncation=True,
padding="max_length"
)
# 生成分析结果
outputs = model.generate(
inputs.input_ids,
max_length=512,
num_beams=5,
early_stopping=True,
temperature=0.7
)
# 解码结果
analysis = tokenizer.decode(outputs[0], skip_special_tokens=True)
return {"result": analysis}
except Exception as e:
return {"error": str(e)}
@app.get("/")
async def health_check():
return {"status": "active"}