Spaces:

yagnik12
/

AI_Text_Detecter

Sleeping

App Files Files Community

yagnik12 commited on Sep 12

Commit

2766639

verified ·

1 Parent(s): 6b86f39

Update ai_text_detector_valid_final.py

Browse files

Files changed (1) hide show

ai_text_detector_valid_final.py +37 -52

ai_text_detector_valid_final.py CHANGED Viewed

@@ -1,40 +1,26 @@
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import numpy as np
-import re
 # Multiple AI text detection models
-MODELS = {
     "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
-    "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
-    "Andreas122001": "andreas122001/roberta-academic-detector",
-    "roberta-mnli": "roberta-large-mnli"
 }
-# Fix for "Final Score" formatting and zero-shot model handling
 def load_model(model_id):
-    if model_id == "roberta-large-mnli":
-        return None, pipeline("zero-shot-classification", model=model_id, device=0 if torch.cuda.is_available() else -1)
-    else:
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = AutoModelForSequenceClassification.from_pretrained(model_id)
-        return tokenizer, model
 def predict(text, tokenizer, model):
-    if isinstance(model, pipeline):
-        # Handle the zero-shot classification pipeline
-        labels = ["human-written", "AI-generated"]
-        result = model(text, labels)
-        human_score = result['scores'][result['labels'].index('human-written')]
-        ai_score = result['scores'][result['labels'].index('AI-generated')]
-        return np.array([human_score, ai_score])
-    else:
-        # Normal text classification
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            outputs = model(**inputs)
-            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        return probs[0].numpy()  # [human_prob, ai_prob]
 def verdict(ai_prob):
     """Return a human-readable verdict based on AI probability"""
@@ -45,9 +31,9 @@ def verdict(ai_prob):
     elif 40 <= ai_prob < 60:
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
-        return "Likely AI-generated with some human editing."
-    else:
-        return "Most likely AI-generated."
 def detect_text(text):
     results = {}
@@ -68,38 +54,37 @@ def detect_text(text):
     # ------------------ Final Score (Average) ------------------
     try:
-        valid_ai_scores = [r["AI Probability"] for r in results.values() if isinstance(r, dict) and "AI Probability" in r]
-        if valid_ai_scores:
-            avg_ai = sum(valid_ai_scores) / len(valid_ai_scores)
             results["Final Score"] = {
-                "Verdict": verdict(avg_ai)
             }
-        else:
-            results["Final Score"] = {"error": "No valid scores to calculate average."}
     except Exception as e:
         results["Final Score"] = {"error": str(e)}
     return results
 if __name__ == "__main__":
     text = input("Enter text to analyze:\n")
     output = detect_text(text)
     print("\n--- Detection Results ---")
     for model, scores in output.items():
         print(f"\n[{model}]")
-        if isinstance(scores, dict):
-            for k, v in scores.items():
-                if isinstance(v, (int, float)):
-                    # Use a regex to clean up the number formatting for a cleaner output
-                    v_str = re.sub(r'(\d+)\.0$', r'\1', f"{v:.2f}")
-                    if k == "Verdict":
-                        print(f"{k}: {v}")
-                    else:
-                        print(f"{k}: {v_str}%")
-                else:
-                    print(f"{k}: {v}")
-        else:
-            print(f"Error: {scores}")

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import numpy as np
 # Multiple AI text detection models
+MODELS = {
     "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
+    "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
+    "Andreas122001":"andreas122001/roberta-academic-detector"
 }
 def load_model(model_id):
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSequenceClassification.from_pretrained(model_id)
+    return tokenizer, model
 def predict(text, tokenizer, model):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    return probs[0].numpy()  # [human_prob, ai_prob]
 def verdict(ai_prob):
     """Return a human-readable verdict based on AI probability"""
     elif 40 <= ai_prob < 60:
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
+        return "Possibly AI-generated, or a human using AI assistance."
+    else:  # ai_prob >= 80
+        return "Likely AI-generated or heavily AI-assisted."
 def detect_text(text):
     results = {}
     # ------------------ Final Score (Average) ------------------
     try:
+        ai_scores, human_scores = [], []
+        for r in results.values():
+            if isinstance(r, dict) and "AI Probability" in r and "Human Probability" in r:
+                ai_scores.append(r["AI Probability"])
+                human_scores.append(r["Human Probability"])
+        if ai_scores and human_scores:
+            avg_ai = sum(ai_scores) / len(ai_scores)
+            avg_human = sum(human_scores) / len(human_scores)
             results["Final Score"] = {
+                # "Human Probability (average)": float(round(avg_human, 2)),
+                # "AI Probability (average)": float(round(avg_ai, 2))
+                # "Verdict": verdict(avg_ai)
+                verdict(avg_ai)
             }
     except Exception as e:
         results["Final Score"] = {"error": str(e)}
     return results
 if __name__ == "__main__":
     text = input("Enter text to analyze:\n")
     output = detect_text(text)
     print("\n--- Detection Results ---")
     for model, scores in output.items():
         print(f"\n[{model}]")
+        for k, v in scores.items():
+            if isinstance(v, (int, float)):  # only add % for numeric values
+                print(f"{k}: {v}%")
+            else:
+                print(f"{k}: {v}")