Spaces:

Sleepyriizi
/

Orify-Text-Detection

Running

App Files Files Community

Sleepyriizi commited on Jun 2

Commit

2a089a8

verified ·

1 Parent(s): decced4

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -34

app.py CHANGED Viewed

@@ -3,51 +3,53 @@
   • Three ModernBERT-base checkpoints (soft-vote)
   • Per-line colour coding, probability tool-tips, top-3 AI model hints
-  • Everything fetched automatically from the weight repo and cached
 """
 # ── Imports ──────────────────────────────────────────────────────────────
 from pathlib import Path
-import re, torch, gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import hf_hub_download
 import spaces
-import os, types                 # add `types`
-# ──────────────────  robust torch.compile shim  ─────────────────────────
 if hasattr(torch, "compile"):
-    def _no_compile(model: types.Any = None, *args, **kwargs):
         """
-        1. If called as torch.compile(model, …)  → just return the model.
-        2. If called as torch.compile(**kw)      → return a decorator that
-           immediately gives back the class / fn it decorates.
         """
-        if callable(model):      # pattern 1
             return model
-        # pattern 2  (used by ModernBERT via @torch.compile(...))
-        def decorator(fn):
             return fn
         return decorator
-    torch.compile = _no_compile          # monkey-patch
     os.environ["TORCHINDUCTOR_DISABLED"] = "1"
-# (everything below is unchanged)
-DEVICE        = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-WEIGHT_REPO   = "Sleepyriizi/Orify-Text-Detection-Weights"
-FILE_MAP = {"ensamble_1":"ensamble_1",
-            "ensamble_2.bin":"ensamble_2.bin",
-            "ensamble_3":"ensamble_3"}
 BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
 NUM_LABELS      = 41
-LABELS = {            # id → friendly label (unchanged)
-    0: "13B", 1: "30B",  2: "65B",  3: "7B",           4: "GLM130B",
-    5: "bloom_7b", 6: "bloomz", 7: "cohere", 8: "davinci",
-    9: "dolly",   10: "dolly-v2-12b", 11: "flan_t5_base",
-    12: "flan_t5_large", 13: "flan_t5_small", 14: "flan_t5_xl",
-    15: "flan_t5_xxl", 16: "gemma-7b-it", 17: "gemma2-9b-it",
-    18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
     21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
     25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
     28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
@@ -56,7 +58,7 @@ LABELS = {            # id → friendly label (unchanged)
     37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
 }
-# ── CSS (kept identical) ────────────────────────────────────────────────
 CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
 :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
 body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
@@ -75,7 +77,7 @@ print("🧩 Loading tokenizer & models …")
 tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
 models = []
-for alias, path in local_paths.items():
     net = AutoModelForSequenceClassification.from_pretrained(
               BASE_MODEL_NAME, num_labels=NUM_LABELS)
     net.load_state_dict(torch.load(path, map_location=DEVICE))
@@ -92,20 +94,19 @@ def tidy(txt: str) -> str:
     return txt.strip()
 def infer(segment: str):
-    """Return (human%, ai%, [top-3 ai model names])."""
-    inputs = tokeniser(segment, return_tensors="pt", truncation=True,
-                       padding=True).to(DEVICE)
     with torch.no_grad():
         probs = torch.stack([
             torch.softmax(m(**inputs).logits, dim=1) for m in models
         ]).mean(dim=0)[0]
-    ai_probs = probs.clone(); ai_probs[24] = 0        # null out human idx
     ai_score   = ai_probs.sum().item() * 100
     human_score = 100 - ai_score
     top3 = torch.topk(ai_probs, 3).indices.tolist()
-    top3_names = [LABELS[i] for i in top3]
-    return human_score, ai_score, top3_names
 # ── Inference + explanation ──────────────────────────────────────────────
 @spaces.GPU
@@ -139,7 +140,7 @@ def analyse(text: str):
                f"AI-generated {ai_tot/n:.2f}%</span>")
     return verdict + "<hr>" + "<br>".join(highlighted)
-# ── Interface ────────────────────────────────────────────────────────────
 with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
     gr.Markdown("""
     ### Orify Text Detector

   • Three ModernBERT-base checkpoints (soft-vote)
   • Per-line colour coding, probability tool-tips, top-3 AI model hints
+  • Weights auto-downloaded once from the model repo and cached
 """
 # ── Imports ──────────────────────────────────────────────────────────────
 from pathlib import Path
+import re, os, torch, gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 from huggingface_hub import hf_hub_download
 import spaces
+import typing                           # ← fix: use typing.Any
+# ──────────────────  robust torch.compile shim  ──────────────────────────
 if hasattr(torch, "compile"):
+    def _no_compile(model: typing.Any = None, *args, **kwargs):
         """
+        1. torch.compile(model, …)          → return the model unchanged
+        2. torch.compile(**kw) (decorator) → return a decorator that
+           immediately gives back the class/function it decorates
         """
+        if callable(model):                     # pattern 1
             return model
+        def decorator(fn):                      # pattern 2
             return fn
         return decorator
+    torch.compile = _no_compile
     os.environ["TORCHINDUCTOR_DISABLED"] = "1"
+# ── Configuration ────────────────────────────────────────────────────────
+DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+WEIGHT_REPO  = "Sleepyriizi/Orify-Text-Detection-Weights"
+FILE_MAP     = {
+    "ensamble_1":      "ensamble_1",
+    "ensamble_2.bin":  "ensamble_2.bin",
+    "ensamble_3":      "ensamble_3",
+}
 BASE_MODEL_NAME = "answerdotai/ModernBERT-base"
 NUM_LABELS      = 41
+LABELS = {  # id → friendly label
+    0: "13B", 1: "30B", 2: "65B", 3: "7B", 4: "GLM130B", 5: "bloom_7b",
+    6: "bloomz", 7: "cohere", 8: "davinci", 9: "dolly", 10: "dolly-v2-12b",
+    11: "flan_t5_base", 12: "flan_t5_large", 13: "flan_t5_small",
+    14: "flan_t5_xl", 15: "flan_t5_xxl", 16: "gemma-7b-it",
+    17: "gemma2-9b-it", 18: "gpt-3.5-turbo", 19: "gpt-35", 20: "gpt-4",
     21: "gpt-4o", 22: "gpt-j", 23: "gpt-neox", 24: "human",
     25: "llama3-70b", 26: "llama3-8b", 27: "mixtral-8x7b",
     28: "opt-1.3b", 29: "opt-125m", 30: "opt-13b",
     37: "t0-11b", 38: "t0-3b", 39: "text-davinci-002", 40: "text-davinci-003"
 }
+# ── CSS (unchanged) ──────────────────────────────────────────────────────
 CSS = Path(__file__).with_name("style.css").read_text() if Path(__file__).with_name("style.css").exists() else """
 :root{--clr-ai:#ff4d4f;--clr-human:#52c41a;--border:2px solid var(--clr-ai);--radius:10px}
 body{font-family:'Roboto Mono',monospace;margin:0 auto;max-width:900px;padding:32px}
 tokeniser = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
 models = []
+for _, path in local_paths.items():
     net = AutoModelForSequenceClassification.from_pretrained(
               BASE_MODEL_NAME, num_labels=NUM_LABELS)
     net.load_state_dict(torch.load(path, map_location=DEVICE))
     return txt.strip()
 def infer(segment: str):
+    """Return (human%, ai%, list of top-3 AI model names)."""
+    inputs = tokeniser(segment, return_tensors="pt",
+                       truncation=True, padding=True).to(DEVICE)
     with torch.no_grad():
         probs = torch.stack([
             torch.softmax(m(**inputs).logits, dim=1) for m in models
         ]).mean(dim=0)[0]
+    ai_probs = probs.clone(); ai_probs[24] = 0
     ai_score   = ai_probs.sum().item() * 100
     human_score = 100 - ai_score
     top3 = torch.topk(ai_probs, 3).indices.tolist()
+    return human_score, ai_score, [LABELS[i] for i in top3]
 # ── Inference + explanation ──────────────────────────────────────────────
 @spaces.GPU
                f"AI-generated {ai_tot/n:.2f}%</span>")
     return verdict + "<hr>" + "<br>".join(highlighted)
+# ── Gradio interface ─────────────────────────────────────────────────────
 with gr.Blocks(css=CSS, title="Orify Text Detector") as demo:
     gr.Markdown("""
     ### Orify Text Detector