Spaces:

yagnik12
/

AI_Text_Detecter

Running

App Files Files Community

yagnik12 commited on Sep 12

Commit

6b86f39

verified ·

1 Parent(s): a713ea4

Update ai_text_detector_valid_final.py

Browse files

Files changed (1) hide show

ai_text_detector_valid_final.py +42 -44

ai_text_detector_valid_final.py CHANGED Viewed

@@ -1,43 +1,40 @@
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import numpy as np
 # Multiple AI text detection models
-MODELS = {
     "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
-    "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
-    "Andreas122001":"andreas122001/roberta-academic-detector",
     "roberta-mnli": "roberta-large-mnli"
 }
 def load_model(model_id):
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    # Use the zero-shot classification pipeline for NLI models
     if model_id == "roberta-large-mnli":
-        model = pipeline("zero-shot-classification", model=model_id, device=0 if torch.cuda.is_available() else -1)
     else:
         model = AutoModelForSequenceClassification.from_pretrained(model_id)
-    return tokenizer, model
 def predict(text, tokenizer, model):
     if isinstance(model, pipeline):
-        # Use the roberta-mnli model for zero-shot classification
-        candidate_labels = ["This text was written by a human.", "This text was written by an AI."]
-        result = model(text, candidate_labels)
-        # The entailment score for each label is the probability
-        human_prob = result["scores"][result["labels"].index("This text was written by a human.")]
-        ai_prob = result["scores"][result["labels"].index("This text was written by an AI.")]
-        return np.array([human_prob, ai_prob])
     else:
-        # The existing code for other models
         inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
             outputs = model(**inputs)
             probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-        return probs[0].numpy()
 def verdict(ai_prob):
     """Return a human-readable verdict based on AI probability"""
@@ -48,9 +45,9 @@ def verdict(ai_prob):
     elif 40 <= ai_prob < 60:
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
-        return "Possibly AI-generated, or a human using AI assistance."
-    else:  # ai_prob >= 80
-        return "Likely AI-generated or heavily AI-assisted."
 def detect_text(text):
     results = {}
@@ -71,37 +68,38 @@ def detect_text(text):
     # ------------------ Final Score (Average) ------------------
     try:
-        ai_scores, human_scores = [], []
-        for r in results.values():
-            if isinstance(r, dict) and "AI Probability" in r and "Human Probability" in r:
-                ai_scores.append(r["AI Probability"])
-                human_scores.append(r["Human Probability"])
-        if ai_scores and human_scores:
-            avg_ai = sum(ai_scores) / len(ai_scores)
-            avg_human = sum(human_scores) / len(human_scores)
             results["Final Score"] = {
-                # "Human Probability (average)": float(round(avg_human, 2)),
-                # "AI Probability (average)": float(round(avg_ai, 2))
-                # "Verdict": verdict(avg_ai)
-                verdict(avg_ai)
             }
     except Exception as e:
         results["Final Score"] = {"error": str(e)}
     return results
 if __name__ == "__main__":
     text = input("Enter text to analyze:\n")
     output = detect_text(text)
     print("\n--- Detection Results ---")
     for model, scores in output.items():
         print(f"\n[{model}]")
-        for k, v in scores.items():
-            if isinstance(v, (int, float)):  # only add % for numeric values
-                print(f"{k}: {v}%")
-            else:
-                print(f"{k}: {v}")

 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import numpy as np
+import re
 # Multiple AI text detection models
+MODELS = {
     "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
+    "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
+    "Andreas122001": "andreas122001/roberta-academic-detector",
     "roberta-mnli": "roberta-large-mnli"
 }
+# Fix for "Final Score" formatting and zero-shot model handling
 def load_model(model_id):
     if model_id == "roberta-large-mnli":
+        return None, pipeline("zero-shot-classification", model=model_id, device=0 if torch.cuda.is_available() else -1)
     else:
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForSequenceClassification.from_pretrained(model_id)
+        return tokenizer, model
 def predict(text, tokenizer, model):
     if isinstance(model, pipeline):
+        # Handle the zero-shot classification pipeline
+        labels = ["human-written", "AI-generated"]
+        result = model(text, labels)
+        human_score = result['scores'][result['labels'].index('human-written')]
+        ai_score = result['scores'][result['labels'].index('AI-generated')]
+        return np.array([human_score, ai_score])
     else:
+        # Normal text classification
         inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
         with torch.no_grad():
             outputs = model(**inputs)
             probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+        return probs[0].numpy()  # [human_prob, ai_prob]
 def verdict(ai_prob):
     """Return a human-readable verdict based on AI probability"""
     elif 40 <= ai_prob < 60:
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
+        return "Likely AI-generated with some human editing."
+    else:
+        return "Most likely AI-generated."
 def detect_text(text):
     results = {}
     # ------------------ Final Score (Average) ------------------
     try:
+        valid_ai_scores = [r["AI Probability"] for r in results.values() if isinstance(r, dict) and "AI Probability" in r]
+        if valid_ai_scores:
+            avg_ai = sum(valid_ai_scores) / len(valid_ai_scores)
             results["Final Score"] = {
+                "Verdict": verdict(avg_ai)
             }
+        else:
+            results["Final Score"] = {"error": "No valid scores to calculate average."}
     except Exception as e:
         results["Final Score"] = {"error": str(e)}
     return results
 if __name__ == "__main__":
     text = input("Enter text to analyze:\n")
     output = detect_text(text)
     print("\n--- Detection Results ---")
     for model, scores in output.items():
         print(f"\n[{model}]")
+        if isinstance(scores, dict):
+            for k, v in scores.items():
+                if isinstance(v, (int, float)):
+                    # Use a regex to clean up the number formatting for a cleaner output
+                    v_str = re.sub(r'(\d+)\.0$', r'\1', f"{v:.2f}")
+                    if k == "Verdict":
+                        print(f"{k}: {v}")
+                    else:
+                        print(f"{k}: {v_str}%")
+                else:
+                    print(f"{k}: {v}")
+        else:
+            print(f"Error: {scores}")