Sleepyriizi commited on
Commit
919951a
Β·
verified Β·
1 Parent(s): 0460b93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -81
app.py CHANGED
@@ -1,63 +1,65 @@
1
  """
2
- Orify Text Detector – Space edition (Zero-GPU ready)
3
 
4
- β€’ Three ModernBERT-base checkpoints (soft-vote)
5
  β€’ Per-line colour coding, probability tool-tips, top-3 AI model hints
6
- β€’ Everything fetched automatically from the weight repo and cached
7
  """
8
 
9
  # ── Imports ──────────────────────────────────────────────────────────────
10
  from pathlib import Path
11
- import re, os, html, torch, gradio as gr # ← add html
 
12
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
13
  from huggingface_hub import hf_hub_download
14
  import spaces
15
- import os, types # add `types`
16
 
17
- # ────────────────── robust torch.compile shim ─────────────────────────
18
  if hasattr(torch, "compile"):
19
- def _no_compile(model: types.Any = None, *args, **kwargs):
20
  """
21
- 1. If called as torch.compile(model, …) β†’ just return the model.
22
- 2. If called as torch.compile(**kw) β†’ return a decorator that
23
- immediately gives back the class / fn it decorates.
24
  """
25
- if callable(model): # pattern 1
26
  return model
27
- # pattern 2 (used by ModernBERT via @torch.compile(...))
28
- def decorator(fn):
29
  return fn
30
  return decorator
31
 
32
- torch.compile = _no_compile # monkey-patch
33
- os.environ["TORCHINDUCTOR_DISABLED"] = "1"
34
 
35
- # (everything below is unchanged)
36
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
37
  WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
38
- FILE_MAP = {"ensamble_1":"ensamble_1",
39
- "ensamble_2.bin":"ensamble_2.bin",
40
- "ensamble_3":"ensamble_3"}
 
 
 
41
  BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
42
- NUM_LABELS = 41
43
-
44
- LABELS = { # id β†’ friendly label (unchanged)
45
- 0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B",
46
- 5: "bloom_7b", 6: "bloomz", 7: "cohere", 8: "davinci",
47
- 9: "dolly", 10: "dolly-v2-12b", 11: "flan_t5_base",
48
- 12: "flan_t5_large", 13: "flan_t5_small", 14: "flan_t5_xl",
49
- 15: "flan_t5_xxl", 16: "gemma-7b-it", 17: "gemma2-9b-it",
50
- 18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
51
- 21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
52
- 25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
53
- 28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
54
- 31: "opt-2.7b", 32: "opt-30b", 33: "opt-350m",
55
- 34: "opt-6.7b", 35: "opt-iml-30b", 36: "opt-iml-max-1.3b",
56
- 37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
57
  }
58
 
59
- # ── CSS (kept identical) ────────────────────────────────────────────────
60
- CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
 
 
 
61
  :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
62
  body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
63
  textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
@@ -65,91 +67,103 @@ textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1re
65
  .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
66
  .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
67
  """
 
68
 
69
- # ── Model loading (download once, then cached) ───────────────────────────
70
  print("πŸ”„ Downloading weights …")
71
- local_paths = {alias: hf_hub_download(WEIGHT_REPO, fname, resume_download=True)
72
- for alias, fname in FILE_MAP.items()}
73
-
74
- print("🧩 Loading tokenizer & models …")
75
- tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
76
 
 
 
77
  models = []
78
- for alias, path in local_paths.items():
79
  net = AutoModelForSequenceClassification.from_pretrained(
80
- BASE_MODEL_NAME, num_labels=NUM_LABELS)
 
81
  net.load_state_dict(torch.load(path, map_location=DEVICE))
82
  net.to(DEVICE).eval()
83
  models.append(net)
84
 
85
- # ── Helpers ──────────────────────────────────────────────────────────────
86
- def tidy(txt: str) -> str:
87
- txt = txt.replace("\r\n", "\n").replace("\r", "\n")
88
- txt = re.sub(r"\n\s*\n+", "\n\n", txt)
89
- txt = re.sub(r"[ \t]+", " ", txt)
90
- txt = re.sub(r"(\w+)-\n(\w+)", r"\1\2", txt)
91
- txt = re.sub(r"(?<!\n)\n(?!\n)", " ", txt)
92
- return txt.strip()
93
 
94
  def infer(segment: str):
95
- """Return (human%, ai%, [top-3 ai model names])."""
96
- inputs = tokeniser(segment, return_tensors="pt", truncation=True,
97
  padding=True).to(DEVICE)
98
  with torch.no_grad():
99
  probs = torch.stack([
100
  torch.softmax(m(**inputs).logits, dim=1) for m in models
101
- ]).mean(dim=0)[0]
102
 
103
- ai_probs = probs.clone(); ai_probs[24] = 0 # null out human idx
104
  ai_score = ai_probs.sum().item() * 100
105
  human_score = 100 - ai_score
106
  top3 = torch.topk(ai_probs, 3).indices.tolist()
107
- top3_names = [LABELS[i] for i in top3]
108
- return human_score, ai_score, top3_names
109
 
110
- # ── Inference + explanation ──────────────────────────────────────────────
111
  @spaces.GPU
112
  def analyse(text: str):
113
  if not text.strip():
114
  return "✏️ Please paste or type some text to analyse…"
115
 
116
  lines = tidy(text).split("\n")
117
- highlighted, h_tot, ai_tot, n = [], 0.0, 0.0, 0
118
 
119
  for ln in lines:
120
  if not ln.strip():
121
  highlighted.append("<br>")
122
  continue
 
123
  n += 1
124
- h, ai, top3 = infer(ln)
125
- h_tot += h; ai_tot += ai
126
- tooltip = (f"AI {ai:.2f}% β€’ Top-3: {', '.join(top3)}"
127
- if ai > h else f"Human {h:.2f}%")
128
- cls = "ai-line" if ai > h else "human-line"
129
- span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
130
- f"{html.escape(ln)}</span>") # ← use html.escape
 
 
 
 
 
 
131
  highlighted.append(span)
132
 
133
- verdict = (f"<p><strong>Overall verdict:</strong> "
134
- f"<span class='human-line' style='padding:4px 8px;'>"
135
- f"Human-written {h_tot/n:.2f}%</span>"
136
- if h_tot >= ai_tot else
137
- f"<p><strong>Overall verdict:</strong> "
138
- f"<span class='ai-line' style='padding:4px 8px;'>"
139
- f"AI-generated {ai_tot/n:.2f}%</span>")
 
 
 
140
  return verdict + "<hr>" + "<br>".join(highlighted)
141
 
142
- # ── Interface ────────────────────────────────────────────────────────────
143
  with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
144
  gr.Markdown("""
145
  ### Orify Text Detector
146
- Paste any English text and press **Analyse**.
147
- <span class='human-line'>Green</span> = human | <span class='ai-line'>Red</span> = AI.
148
- Hover a line to see confidence and the top-3 AI models it resembles.
 
149
  """)
150
- inp = gr.Textbox(lines=8, placeholder="Paste text here …",
151
- elem_classes=["input-area"])
152
- out = gr.HTML("", elem_classes=["output-box"])
153
  gr.Button("Analyse").click(analyse, inp, out)
154
  gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
155
 
 
1
  """
2
+ Orify Text Detector – Hugging Face Space (Zero-GPU ready)
3
 
4
+ β€’ Three ModernBERT-base checkpoints (soft-vote ensemble)
5
  β€’ Per-line colour coding, probability tool-tips, top-3 AI model hints
6
+ β€’ Weights auto-downloaded once and cached on the Zero-GPU T4
7
  """
8
 
9
  # ── Imports ──────────────────────────────────────────────────────────────
10
  from pathlib import Path
11
+ import os, re, html, typing
12
+ import torch, gradio as gr
13
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
14
  from huggingface_hub import hf_hub_download
15
  import spaces
 
16
 
17
+ # ───────────────── torch.compile hot-patch ───────────────────────────────
18
  if hasattr(torch, "compile"):
19
+ def _no_compile(model: typing.Any = None, *args, **kwargs):
20
  """
21
+ β€’ torch.compile(model, …) β†’ return model unchanged
22
+ β€’ @torch.compile(**kw) decorator β†’ return identity decorator
 
23
  """
24
+ if callable(model): # direct call pattern
25
  return model
26
+
27
+ def decorator(fn): # decorator pattern
28
  return fn
29
  return decorator
30
 
31
+ torch.compile = _no_compile
32
+ os.environ["TORCHINDUCTOR_DISABLED"] = "1" # extra safety
33
 
34
+ # ── Config / constants ───────────────────────────────────────────────────
35
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
36
  WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
37
+ FILE_MAP = {
38
+ "ensamble_1" : "ensamble_1",
39
+ "ensamble_2.bin" : "ensamble_2.bin",
40
+ "ensamble_3" : "ensamble_3",
41
+ }
42
+
43
  BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
44
+ NUM_LABELS = 41
45
+
46
+ LABELS = { # id β†’ readable label
47
+ 0:"13B",1:"30B",2:"65B",3:"7B",4:"GLM130B",5:"bloom_7b",6:"bloomz",7:"cohere",
48
+ 8:"davinci",9:"dolly",10:"dolly-v2-12b",11:"flan_t5_base",12:"flan_t5_large",
49
+ 13:"flan_t5_small",14:"flan_t5_xl",15:"flan_t5_xxl",16:"gemma-7b-it",
50
+ 17:"gemma2-9b-it",18:"gpt-3.5-turbo",19:"gpt-35",20:"gpt-4",21:"gpt-4o",
51
+ 22:"gpt-j",23:"gpt-neox",24:"human",25:"llama3-70b",26:"llama3-8b",
52
+ 27:"mixtral-8x7b",28:"opt-1.3b",29:"opt-125m",30:"opt-13b",31:"opt-2.7b",
53
+ 32:"opt-30b",33:"opt-350m",34:"opt-6.7b",35:"opt-iml-30b",
54
+ 36:"opt-iml-max-1.3b",37:"t0-11b",38:"t0-3b",39:"text-davinci-002",
55
+ 40:"text-davinci-003"
 
 
 
56
  }
57
 
58
+ # ── CSS (inline fallback) ────────────────────────────────────────────────
59
+ CSS = (
60
+ Path(__file__).with_name("style.css").read_text()
61
+ if Path(__file__).with_name("style.css").exists()
62
+ else """
63
  :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
64
  body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
65
  textarea,.output-box{width:100%;box-sizing:border-box;padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
 
67
  .human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
68
  .prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
69
  """
70
+ )
71
 
72
+ # ── Weight download & model init ─────────────────────────────────────────
73
  print("πŸ”„ Downloading weights …")
74
+ local_paths = {
75
+ alias: hf_hub_download(WEIGHT_REPO, remote, resume_download=True)
76
+ for alias, remote in FILE_MAP.items()
77
+ }
 
78
 
79
+ print("🧩 Loading tokenizer & ensemble …")
80
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
81
  models = []
82
+ for path in local_paths.values():
83
  net = AutoModelForSequenceClassification.from_pretrained(
84
+ BASE_MODEL_NAME, num_labels=NUM_LABELS
85
+ )
86
  net.load_state_dict(torch.load(path, map_location=DEVICE))
87
  net.to(DEVICE).eval()
88
  models.append(net)
89
 
90
+ # ── Helper functions ─────────────────────────────────────────────────────
91
+ def tidy(text: str) -> str:
92
+ text = text.replace("\r\n", "\n").replace("\r", "\n")
93
+ text = re.sub(r"\n\s*\n+", "\n\n", text)
94
+ text = re.sub(r"[ \t]+", " ", text)
95
+ text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
96
+ text = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
97
+ return text.strip()
98
 
99
  def infer(segment: str):
100
+ """Return (human%, ai%, list[top-3 AI names])."""
101
+ inputs = tokenizer(segment, return_tensors="pt", truncation=True,
102
  padding=True).to(DEVICE)
103
  with torch.no_grad():
104
  probs = torch.stack([
105
  torch.softmax(m(**inputs).logits, dim=1) for m in models
106
+ ]).mean(0)[0]
107
 
108
+ ai_probs = probs.clone(); ai_probs[24] = 0 # remove 'human'
109
  ai_score = ai_probs.sum().item() * 100
110
  human_score = 100 - ai_score
111
  top3 = torch.topk(ai_probs, 3).indices.tolist()
112
+ return human_score, ai_score, [LABELS[i] for i in top3]
 
113
 
114
+ # ── Inference with explanations ─────────────────────────────────────────
115
  @spaces.GPU
116
  def analyse(text: str):
117
  if not text.strip():
118
  return "✏️ Please paste or type some text to analyse…"
119
 
120
  lines = tidy(text).split("\n")
121
+ highlighted, h_sum, ai_sum, n = [], 0.0, 0.0, 0
122
 
123
  for ln in lines:
124
  if not ln.strip():
125
  highlighted.append("<br>")
126
  continue
127
+
128
  n += 1
129
+ human_p, ai_p, top3 = infer(ln)
130
+ h_sum += human_p
131
+ ai_sum += ai_p
132
+
133
+ tooltip = (
134
+ f"AI {ai_p:.2f}% β€’ Top-3: {', '.join(top3)}"
135
+ if ai_p > human_p else f"Human {human_p:.2f}%"
136
+ )
137
+ cls = "ai-line" if ai_p > human_p else "human-line"
138
+ span = (
139
+ f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
140
+ f"{html.escape(ln)}</span>"
141
+ )
142
  highlighted.append(span)
143
 
144
+ human_avg, ai_avg = h_sum / n, ai_sum / n
145
+ verdict = (
146
+ f"<p><strong>Overall verdict:</strong> "
147
+ f"<span class='human-line' style='padding:4px 8px;'>"
148
+ f"Human-written {human_avg:.2f}%</span>"
149
+ if human_avg >= ai_avg else
150
+ f"<p><strong>Overall verdict:</strong> "
151
+ f"<span class='ai-line' style='padding:4px 8px;'>"
152
+ f"AI-generated {ai_avg:.2f}%</span>"
153
+ )
154
  return verdict + "<hr>" + "<br>".join(highlighted)
155
 
156
+ # ── Gradio UI ───────────────────────────────────────────────────────────
157
  with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
158
  gr.Markdown("""
159
  ### Orify Text Detector
160
+ Paste any English text and press **Analyse**.<br>
161
+ <span class='human-line'>Green</span> = human | 
162
+ <span class='ai-line'>Red</span> = AI.<br>
163
+ Hover a line to see confidence & the top-3 AI models it matches.
164
  """)
165
+ inp = gr.Textbox(lines=8, placeholder="Paste text here …")
166
+ out = gr.HTML(elem_classes=["output-box"])
 
167
  gr.Button("Analyse").click(analyse, inp, out)
168
  gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>")
169