Update app.py
Browse files
app.py
CHANGED
@@ -1,63 +1,65 @@
|
|
1 |
"""
|
2 |
-
Orify Text Detector β Space
|
3 |
|
4 |
-
β’ Three ModernBERT-base checkpoints (soft-vote)
|
5 |
β’ Per-line colour coding, probability tool-tips, top-3 AI model hints
|
6 |
-
β’
|
7 |
"""
|
8 |
|
9 |
# ββ Imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
10 |
from pathlib import Path
|
11 |
-
import
|
|
|
12 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
13 |
from huggingface_hub import hf_hub_download
|
14 |
import spaces
|
15 |
-
import os, types # add `types`
|
16 |
|
17 |
-
#
|
18 |
if hasattr(torch, "compile"):
|
19 |
-
def _no_compile(model:
|
20 |
"""
|
21 |
-
|
22 |
-
|
23 |
-
immediately gives back the class / fn it decorates.
|
24 |
"""
|
25 |
-
if callable(model):
|
26 |
return model
|
27 |
-
|
28 |
-
def decorator(fn):
|
29 |
return fn
|
30 |
return decorator
|
31 |
|
32 |
-
torch.compile = _no_compile
|
33 |
-
os.environ["TORCHINDUCTOR_DISABLED"] = "1"
|
34 |
|
35 |
-
#
|
36 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
37 |
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
|
38 |
-
FILE_MAP
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
41 |
BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
|
42 |
-
NUM_LABELS
|
43 |
-
|
44 |
-
LABELS = {
|
45 |
-
0:
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
|
55 |
-
34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
|
56 |
-
37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
|
57 |
}
|
58 |
|
59 |
-
# ββ CSS (
|
60 |
-
CSS =
|
|
|
|
|
|
|
61 |
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
|
62 |
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
|
63 |
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
|
@@ -65,91 +67,103 @@ textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1re
|
|
65 |
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
|
66 |
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
|
67 |
"""
|
|
|
68 |
|
69 |
-
# ββ
|
70 |
print("π Downloading weights β¦")
|
71 |
-
local_paths = {
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
|
76 |
|
|
|
|
|
77 |
models = []
|
78 |
-
for
|
79 |
net = AutoModelForSequenceClassification.from_pretrained(
|
80 |
-
|
|
|
81 |
net.load_state_dict(torch.load(path, map_location=DEVICE))
|
82 |
net.to(DEVICE).eval()
|
83 |
models.append(net)
|
84 |
|
85 |
-
# ββ
|
86 |
-
def tidy(
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
return
|
93 |
|
94 |
def infer(segment: str):
|
95 |
-
"""Return (human%, ai%, [top-3
|
96 |
-
inputs =
|
97 |
padding=True).to(DEVICE)
|
98 |
with torch.no_grad():
|
99 |
probs = torch.stack([
|
100 |
torch.softmax(m(**inputs).logits, dim=1) for m in models
|
101 |
-
]).mean(
|
102 |
|
103 |
-
ai_probs = probs.clone(); ai_probs[24] = 0
|
104 |
ai_score = ai_probs.sum().item() * 100
|
105 |
human_score = 100 - ai_score
|
106 |
top3 = torch.topk(ai_probs, 3).indices.tolist()
|
107 |
-
|
108 |
-
return human_score, ai_score, top3_names
|
109 |
|
110 |
-
# ββ Inference
|
111 |
@spaces.GPU
|
112 |
def analyse(text: str):
|
113 |
if not text.strip():
|
114 |
return "βοΈ Please paste or type some text to analyseβ¦"
|
115 |
|
116 |
lines = tidy(text).split("\n")
|
117 |
-
highlighted,
|
118 |
|
119 |
for ln in lines:
|
120 |
if not ln.strip():
|
121 |
highlighted.append("<br>")
|
122 |
continue
|
|
|
123 |
n += 1
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
highlighted.append(span)
|
132 |
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
140 |
return verdict + "<hr>" + "<br>".join(highlighted)
|
141 |
|
142 |
-
# ββ
|
143 |
with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
|
144 |
gr.Markdown("""
|
145 |
### Orify Text Detector
|
146 |
-
Paste any English text and press **Analyse
|
147 |
-
<span class='human-line'>Green</span> = humanβ|β
|
148 |
-
|
|
|
149 |
""")
|
150 |
-
inp = gr.Textbox(lines=8, placeholder="Paste text here β¦"
|
151 |
-
|
152 |
-
out = gr.HTML("", elem_classes=["output-box"])
|
153 |
gr.Button("Analyse").click(analyse, inp, out)
|
154 |
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
|
155 |
|
|
|
1 |
"""
|
2 |
+
Orify Text Detector β Hugging Face Space (Zero-GPU ready)
|
3 |
|
4 |
+
β’ Three ModernBERT-base checkpoints (soft-vote ensemble)
|
5 |
β’ Per-line colour coding, probability tool-tips, top-3 AI model hints
|
6 |
+
β’ Weights auto-downloaded once and cached on the Zero-GPU T4
|
7 |
"""
|
8 |
|
9 |
# ββ Imports ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
10 |
from pathlib import Path
|
11 |
+
import os, re, html, typing
|
12 |
+
import torch, gradio as gr
|
13 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
14 |
from huggingface_hub import hf_hub_download
|
15 |
import spaces
|
|
|
16 |
|
17 |
+
# βββββββββββββββββ torch.compile hot-patch βββββββββββββββββββββββββββββββ
|
18 |
if hasattr(torch, "compile"):
|
19 |
+
def _no_compile(model: typing.Any = None, *args, **kwargs):
|
20 |
"""
|
21 |
+
β’ torch.compile(model, β¦) β return model unchanged
|
22 |
+
β’ @torch.compile(**kw) decorator β return identity decorator
|
|
|
23 |
"""
|
24 |
+
if callable(model): # direct call pattern
|
25 |
return model
|
26 |
+
|
27 |
+
def decorator(fn): # decorator pattern
|
28 |
return fn
|
29 |
return decorator
|
30 |
|
31 |
+
torch.compile = _no_compile
|
32 |
+
os.environ["TORCHINDUCTOR_DISABLED"] = "1" # extra safety
|
33 |
|
34 |
+
# ββ Config / constants βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
35 |
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
36 |
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
|
37 |
+
FILE_MAP = {
|
38 |
+
"ensamble_1" : "ensamble_1",
|
39 |
+
"ensamble_2.bin" : "ensamble_2.bin",
|
40 |
+
"ensamble_3" : "ensamble_3",
|
41 |
+
}
|
42 |
+
|
43 |
BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
|
44 |
+
NUM_LABELS = 41
|
45 |
+
|
46 |
+
LABELS = { # id β readable label
|
47 |
+
0:"13B",1:"30B",2:"65B",3:"7B",4:"GLM130B",5:"bloom_7b",6:"bloomz",7:"cohere",
|
48 |
+
8:"davinci",9:"dolly",10:"dolly-v2-12b",11:"flan_t5_base",12:"flan_t5_large",
|
49 |
+
13:"flan_t5_small",14:"flan_t5_xl",15:"flan_t5_xxl",16:"gemma-7b-it",
|
50 |
+
17:"gemma2-9b-it",18:"gpt-3.5-turbo",19:"gpt-35",20:"gpt-4",21:"gpt-4o",
|
51 |
+
22:"gpt-j",23:"gpt-neox",24:"human",25:"llama3-70b",26:"llama3-8b",
|
52 |
+
27:"mixtral-8x7b",28:"opt-1.3b",29:"opt-125m",30:"opt-13b",31:"opt-2.7b",
|
53 |
+
32:"opt-30b",33:"opt-350m",34:"opt-6.7b",35:"opt-iml-30b",
|
54 |
+
36:"opt-iml-max-1.3b",37:"t0-11b",38:"t0-3b",39:"text-davinci-002",
|
55 |
+
40:"text-davinci-003"
|
|
|
|
|
|
|
56 |
}
|
57 |
|
58 |
+
# ββ CSS (inline fallback) ββββββββββββββββββββββββββββββββββββββββββββββββ
|
59 |
+
CSS = (
|
60 |
+
Path(__file__).with_name("style.css").read_text()
|
61 |
+
if Path(__file__).with_name("style.css").exists()
|
62 |
+
else """
|
63 |
:root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
|
64 |
body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
|
65 |
textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
|
|
|
67 |
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
|
68 |
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
|
69 |
"""
|
70 |
+
)
|
71 |
|
72 |
+
# ββ Weight download & model init βββββββββββββββββββββββββββββββββββββββββ
|
73 |
print("π Downloading weights β¦")
|
74 |
+
local_paths = {
|
75 |
+
alias: hf_hub_download(WEIGHT_REPO, remote, resume_download=True)
|
76 |
+
for alias, remote in FILE_MAP.items()
|
77 |
+
}
|
|
|
78 |
|
79 |
+
print("π§© Loading tokenizer & ensemble β¦")
|
80 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
|
81 |
models = []
|
82 |
+
for path in local_paths.values():
|
83 |
net = AutoModelForSequenceClassification.from_pretrained(
|
84 |
+
BASE_MODEL_NAME, num_labels=NUM_LABELS
|
85 |
+
)
|
86 |
net.load_state_dict(torch.load(path, map_location=DEVICE))
|
87 |
net.to(DEVICE).eval()
|
88 |
models.append(net)
|
89 |
|
90 |
+
# ββ Helper functions βββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
91 |
+
def tidy(text: str) -> str:
|
92 |
+
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
93 |
+
text = re.sub(r"\n\s*\n+", "\n\n", text)
|
94 |
+
text = re.sub(r"[ \t]+", " ", text)
|
95 |
+
text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
|
96 |
+
text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
|
97 |
+
return text.strip()
|
98 |
|
99 |
def infer(segment: str):
|
100 |
+
"""Return (human%, ai%, list[top-3 AI names])."""
|
101 |
+
inputs = tokenizer(segment, return_tensors="pt", truncation=True,
|
102 |
padding=True).to(DEVICE)
|
103 |
with torch.no_grad():
|
104 |
probs = torch.stack([
|
105 |
torch.softmax(m(**inputs).logits, dim=1) for m in models
|
106 |
+
]).mean(0)[0]
|
107 |
|
108 |
+
ai_probs = probs.clone(); ai_probs[24] = 0 # remove 'human'
|
109 |
ai_score = ai_probs.sum().item() * 100
|
110 |
human_score = 100 - ai_score
|
111 |
top3 = torch.topk(ai_probs, 3).indices.tolist()
|
112 |
+
return human_score, ai_score, [LABELS[i] for i in top3]
|
|
|
113 |
|
114 |
+
# ββ Inference with explanations βββββββββββββββββββββββββββββββββββββββββ
|
115 |
@spaces.GPU
|
116 |
def analyse(text: str):
|
117 |
if not text.strip():
|
118 |
return "βοΈ Please paste or type some text to analyseβ¦"
|
119 |
|
120 |
lines = tidy(text).split("\n")
|
121 |
+
highlighted, h_sum, ai_sum, n = [], 0.0, 0.0, 0
|
122 |
|
123 |
for ln in lines:
|
124 |
if not ln.strip():
|
125 |
highlighted.append("<br>")
|
126 |
continue
|
127 |
+
|
128 |
n += 1
|
129 |
+
human_p, ai_p, top3 = infer(ln)
|
130 |
+
h_sum += human_p
|
131 |
+
ai_sum += ai_p
|
132 |
+
|
133 |
+
tooltip = (
|
134 |
+
f"AI {ai_p:.2f}% β’ Top-3: {', '.join(top3)}"
|
135 |
+
if ai_p > human_p else f"Human {human_p:.2f}%"
|
136 |
+
)
|
137 |
+
cls = "ai-line" if ai_p > human_p else "human-line"
|
138 |
+
span = (
|
139 |
+
f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
|
140 |
+
f"{html.escape(ln)}</span>"
|
141 |
+
)
|
142 |
highlighted.append(span)
|
143 |
|
144 |
+
human_avg, ai_avg = h_sum / n, ai_sum / n
|
145 |
+
verdict = (
|
146 |
+
f"<p><strong>Overall verdict:</strong> "
|
147 |
+
f"<span class='human-line' style='padding:4px 8px;'>"
|
148 |
+
f"Human-written {human_avg:.2f}%</span>"
|
149 |
+
if human_avg >= ai_avg else
|
150 |
+
f"<p><strong>Overall verdict:</strong> "
|
151 |
+
f"<span class='ai-line' style='padding:4px 8px;'>"
|
152 |
+
f"AI-generated {ai_avg:.2f}%</span>"
|
153 |
+
)
|
154 |
return verdict + "<hr>" + "<br>".join(highlighted)
|
155 |
|
156 |
+
# ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
157 |
with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
|
158 |
gr.Markdown("""
|
159 |
### Orify Text Detector
|
160 |
+
Paste any English text and press **Analyse**.<br>
|
161 |
+
<span class='human-line'>Green</span> = humanβ|β
|
162 |
+
<span class='ai-line'>Red</span> = AI.<br>
|
163 |
+
Hover a line to see confidence & the top-3 AI models it matches.
|
164 |
""")
|
165 |
+
inp = gr.Textbox(lines=8, placeholder="Paste text here β¦")
|
166 |
+
out = gr.HTML(elem_classes=["output-box"])
|
|
|
167 |
gr.Button("Analyse").click(analyse, inp, out)
|
168 |
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
|
169 |
|