Spaces:

Sleepyriizi
/

Orify-Text-Detection

Running

App Files Files Community

Sleepyriizi commited on Jun 2

Commit

0460b93

verified ·

1 Parent(s): 2a089a8

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -37

app.py CHANGED Viewed

@@ -3,53 +3,51 @@
   • Three ModernBERT-base checkpoints (soft-vote)
   • Per-line colour coding, probability tool-tips, top-3 AI model hints
-  • Weights auto-downloaded once from the model repo and cached
 """
 # ── Imports ──────────────────────────────────────────────────────────────
 from pathlib import Path
-import re, os, torch, gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import hf_hub_download
 import spaces
-import typing                           # ← fix: use typing.Any
-# ──────────────────  robust torch.compile shim  ──────────────────────────
 if hasattr(torch, "compile"):
-    def _no_compile(model: typing.Any = None, *args, **kwargs):
         """
-        1. torch.compile(model, …)          → return the model unchanged
-        2. torch.compile(**kw) (decorator) → return a decorator that
-           immediately gives back the class/function it decorates
         """
-        if callable(model):                     # pattern 1
             return model
-        def decorator(fn):                      # pattern 2
             return fn
         return decorator
-    torch.compile = _no_compile
     os.environ["TORCHINDUCTOR_DISABLED"] = "1"
-# ── Configuration ────────────────────────────────────────────────────────
-DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-WEIGHT_REPO  = "Sleepyriizi/Orify-Text-Detection-Weights"
-FILE_MAP     = {
-    "ensamble_1":      "ensamble_1",
-    "ensamble_2.bin":  "ensamble_2.bin",
-    "ensamble_3":      "ensamble_3",
-}
 BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
 NUM_LABELS      = 41
-LABELS = {  # id → friendly label
-    0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B", 5: "bloom_7b",
-    6: "bloomz", 7: "cohere", 8: "davinci", 9: "dolly", 10: "dolly-v2-12b",
-    11: "flan_t5_base", 12: "flan_t5_large", 13: "flan_t5_small",
-    14: "flan_t5_xl", 15: "flan_t5_xxl", 16: "gemma-7b-it",
-    17: "gemma2-9b-it", 18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
     21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
     25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
     28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
@@ -58,7 +56,7 @@ LABELS = {  # id → friendly label
     37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
 }
-# ── CSS (unchanged) ──────────────────────────────────────────────────────
 CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
 :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
 body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
@@ -77,7 +75,7 @@ print("🧩 Loading tokenizer & models …")
 tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
 models = []
-for _, path in local_paths.items():
     net = AutoModelForSequenceClassification.from_pretrained(
               BASE_MODEL_NAME, num_labels=NUM_LABELS)
     net.load_state_dict(torch.load(path, map_location=DEVICE))
@@ -94,19 +92,20 @@ def tidy(txt: str) -> str:
     return txt.strip()
 def infer(segment: str):
-    """Return (human%, ai%, list of top-3 AI model names)."""
-    inputs = tokeniser(segment, return_tensors="pt",
-                       truncation=True, padding=True).to(DEVICE)
     with torch.no_grad():
         probs = torch.stack([
             torch.softmax(m(**inputs).logits, dim=1) for m in models
         ]).mean(dim=0)[0]
-    ai_probs = probs.clone(); ai_probs[24] = 0
     ai_score   = ai_probs.sum().item() * 100
     human_score = 100 - ai_score
     top3 = torch.topk(ai_probs, 3).indices.tolist()
-    return human_score, ai_score, [LABELS[i] for i in top3]
 # ── Inference + explanation ──────────────────────────────────────────────
 @spaces.GPU
@@ -126,9 +125,9 @@ def analyse(text: str):
         h_tot += h; ai_tot += ai
         tooltip = (f"AI {ai:.2f}% • Top-3: {', '.join(top3)}"
                    if ai > h else f"Human {h:.2f}%")
-        cls = "ai-line" if ai > h else "human-line"
         span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
-                f"{gr.utils.sanitize_html(ln)}</span>")
         highlighted.append(span)
     verdict = (f"<p><strong>Overall verdict:</strong> "
@@ -140,7 +139,7 @@ def analyse(text: str):
                f"AI-generated {ai_tot/n:.2f}%</span>")
     return verdict + "<hr>" + "<br>".join(highlighted)
-# ── Gradio interface ─────────────────────────────────────────────────────
 with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
     gr.Markdown("""
     ### Orify Text Detector

   • Three ModernBERT-base checkpoints (soft-vote)
   • Per-line colour coding, probability tool-tips, top-3 AI model hints
+  • Everything fetched automatically from the weight repo and cached
 """
 # ── Imports ──────────────────────────────────────────────────────────────
 from pathlib import Path
+import re, os, html, torch, gradio as gr   # ← add html
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import hf_hub_download
 import spaces
+import os, types                 # add `types`
+# ──────────────────  robust torch.compile shim  ─────────────────────────
 if hasattr(torch, "compile"):
+    def _no_compile(model: types.Any = None, *args, **kwargs):
         """
+        1. If called as torch.compile(model, …)  → just return the model.
+        2. If called as torch.compile(**kw)      → return a decorator that
+           immediately gives back the class / fn it decorates.
         """
+        if callable(model):      # pattern 1
             return model
+        # pattern 2  (used by ModernBERT via @torch.compile(...))
+        def decorator(fn):
             return fn
         return decorator
+    torch.compile = _no_compile          # monkey-patch
     os.environ["TORCHINDUCTOR_DISABLED"] = "1"
+# (everything below is unchanged)
+DEVICE        = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+WEIGHT_REPO   = "Sleepyriizi/Orify-Text-Detection-Weights"
+FILE_MAP = {"ensamble_1":"ensamble_1",
+            "ensamble_2.bin":"ensamble_2.bin",
+            "ensamble_3":"ensamble_3"}
 BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
 NUM_LABELS      = 41
+LABELS = {            # id → friendly label (unchanged)
+    0: "13B", 1: "30B",  2: "65B",  3: "7B",           4: "GLM130B",
+    5: "bloom_7b", 6: "bloomz", 7: "cohere", 8: "davinci",
+    9: "dolly",   10: "dolly-v2-12b", 11: "flan_t5_base",
+    12: "flan_t5_large", 13: "flan_t5_small", 14: "flan_t5_xl",
+    15: "flan_t5_xxl", 16: "gemma-7b-it", 17: "gemma2-9b-it",
+    18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
     21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
     25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
     28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
     37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
 }
+# ── CSS (kept identical) ────────────────────────────────────────────────
 CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
 :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
 body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
 tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
 models = []
+for alias, path in local_paths.items():
     net = AutoModelForSequenceClassification.from_pretrained(
               BASE_MODEL_NAME, num_labels=NUM_LABELS)
     net.load_state_dict(torch.load(path, map_location=DEVICE))
     return txt.strip()
 def infer(segment: str):
+    """Return (human%, ai%, [top-3 ai model names])."""
+    inputs = tokeniser(segment, return_tensors="pt", truncation=True,
+                       padding=True).to(DEVICE)
     with torch.no_grad():
         probs = torch.stack([
             torch.softmax(m(**inputs).logits, dim=1) for m in models
         ]).mean(dim=0)[0]
+    ai_probs = probs.clone(); ai_probs[24] = 0        # null out human idx
     ai_score   = ai_probs.sum().item() * 100
     human_score = 100 - ai_score
     top3 = torch.topk(ai_probs, 3).indices.tolist()
+    top3_names = [LABELS[i] for i in top3]
+    return human_score, ai_score, top3_names
 # ── Inference + explanation ──────────────────────────────────────────────
 @spaces.GPU
         h_tot += h; ai_tot += ai
         tooltip = (f"AI {ai:.2f}% • Top-3: {', '.join(top3)}"
                    if ai > h else f"Human {h:.2f}%")
+        cls  = "ai-line" if ai > h else "human-line"
         span = (f"<span class='{cls} prob-tooltip' title='{tooltip}'>"
+                f"{html.escape(ln)}</span>")          # ← use html.escape
         highlighted.append(span)
     verdict = (f"<p><strong>Overall verdict:</strong> "
                f"AI-generated {ai_tot/n:.2f}%</span>")
     return verdict + "<hr>" + "<br>".join(highlighted)
+# ── Interface ────────────────────────────────────────────────────────────
 with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
     gr.Markdown("""
     ### Orify Text Detector