wm-detection / app.py
TheFrenchDemos's picture
initial commit
dd1a934
raw
history blame
2.07 kB
"""
docker build -t wm-detector .
docker run -p 7860:7860 -v $(pwd)/data:/app/data wm-detector
"""
from flask import Flask, render_template, request
import torch
import numpy as np
from src.detector import MarylandDetector, AutoTokenizer
app = Flask(__name__)
# Minimal setup: pick a detector (example: MarylandDetector)
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
detector = MarylandDetector(tokenizer=tokenizer)
def tokenize_text(text):
return tokenizer.encode(text, add_special_tokens=False)
def compute_scores(tokens):
# Convert tokens to the detector's format
score_list = []
for i in range(len(tokens)):
if i < detector.ngram:
score_list.append(0)
continue
ngram_tokens = tokens[i-detector.ngram:i]
curr_score = detector.score_tok(ngram_tokens, tokens[i]).sum().item()
score_list.append(curr_score)
# Compute final p-value (example uses sum of scores)
final_pvalue = detector.get_pvalue(sum(score_list), len(score_list), 1e-10)
return score_list, final_pvalue
@app.route("/", methods=["GET", "POST"])
def index():
tokens, colors, pvalue = [], [], None
if request.method == "POST":
user_text = request.form.get("user_text", "")
tokens = tokenize_text(user_text)
score_list, pvalue = compute_scores(tokens)
# Convert token IDs to text
displayed_tokens = tokenizer.convert_ids_to_tokens(tokens)
# Assign a simple color scale based on score
max_score = max(score_list) if score_list else 1
colors = [f"rgba(255, 0, 0, {s/max_score})" if max_score!=0 else "white"
for s in score_list]
return render_template("index.html",
tokens=displayed_tokens,
colors=colors,
pvalue=pvalue)
return render_template("index.html", tokens=tokens, colors=colors, pvalue=pvalue)
if __name__ == "__main__":
app.run(host='0.0.0.0', port=7860)