yagnik12 commited on
Commit
2766639
·
verified ·
1 Parent(s): 6b86f39

Update ai_text_detector_valid_final.py

Browse files
Files changed (1) hide show
  1. ai_text_detector_valid_final.py +37 -52
ai_text_detector_valid_final.py CHANGED
@@ -1,40 +1,26 @@
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
  import numpy as np
4
- import re
5
 
6
  # Multiple AI text detection models
7
- MODELS = {
8
  "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
9
- "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
10
- "Andreas122001": "andreas122001/roberta-academic-detector",
11
- "roberta-mnli": "roberta-large-mnli"
12
  }
13
-
14
- # Fix for "Final Score" formatting and zero-shot model handling
15
  def load_model(model_id):
16
- if model_id == "roberta-large-mnli":
17
- return None, pipeline("zero-shot-classification", model=model_id, device=0 if torch.cuda.is_available() else -1)
18
- else:
19
- tokenizer = AutoTokenizer.from_pretrained(model_id)
20
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
21
- return tokenizer, model
22
 
23
  def predict(text, tokenizer, model):
24
- if isinstance(model, pipeline):
25
- # Handle the zero-shot classification pipeline
26
- labels = ["human-written", "AI-generated"]
27
- result = model(text, labels)
28
- human_score = result['scores'][result['labels'].index('human-written')]
29
- ai_score = result['scores'][result['labels'].index('AI-generated')]
30
- return np.array([human_score, ai_score])
31
- else:
32
- # Normal text classification
33
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
34
- with torch.no_grad():
35
- outputs = model(**inputs)
36
- probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
37
- return probs[0].numpy() # [human_prob, ai_prob]
38
 
39
  def verdict(ai_prob):
40
  """Return a human-readable verdict based on AI probability"""
@@ -45,9 +31,9 @@ def verdict(ai_prob):
45
  elif 40 <= ai_prob < 60:
46
  return "Unclear – could be either human or AI-assisted."
47
  elif 60 <= ai_prob < 80:
48
- return "Likely AI-generated with some human editing."
49
- else:
50
- return "Most likely AI-generated."
51
 
52
  def detect_text(text):
53
  results = {}
@@ -68,38 +54,37 @@ def detect_text(text):
68
 
69
  # ------------------ Final Score (Average) ------------------
70
  try:
71
- valid_ai_scores = [r["AI Probability"] for r in results.values() if isinstance(r, dict) and "AI Probability" in r]
72
-
73
- if valid_ai_scores:
74
- avg_ai = sum(valid_ai_scores) / len(valid_ai_scores)
 
 
 
 
 
 
 
75
  results["Final Score"] = {
76
- "Verdict": verdict(avg_ai)
 
 
 
77
  }
78
- else:
79
- results["Final Score"] = {"error": "No valid scores to calculate average."}
80
-
81
  except Exception as e:
82
  results["Final Score"] = {"error": str(e)}
83
 
84
  return results
85
 
 
86
  if __name__ == "__main__":
87
  text = input("Enter text to analyze:\n")
88
  output = detect_text(text)
89
-
90
  print("\n--- Detection Results ---")
91
  for model, scores in output.items():
92
  print(f"\n[{model}]")
93
- if isinstance(scores, dict):
94
- for k, v in scores.items():
95
- if isinstance(v, (int, float)):
96
- # Use a regex to clean up the number formatting for a cleaner output
97
- v_str = re.sub(r'(\d+)\.0$', r'\1', f"{v:.2f}")
98
- if k == "Verdict":
99
- print(f"{k}: {v}")
100
- else:
101
- print(f"{k}: {v_str}%")
102
- else:
103
- print(f"{k}: {v}")
104
- else:
105
- print(f"Error: {scores}")
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
  import numpy as np
 
4
 
5
  # Multiple AI text detection models
6
+ MODELS = {
7
  "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
8
+ "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
9
+ "Andreas122001":"andreas122001/roberta-academic-detector"
 
10
  }
11
+
 
12
  def load_model(model_id):
13
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
14
+ model = AutoModelForSequenceClassification.from_pretrained(model_id)
15
+ return tokenizer, model
 
 
 
16
 
17
  def predict(text, tokenizer, model):
18
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
19
+ with torch.no_grad():
20
+ outputs = model(**inputs)
21
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
22
+ return probs[0].numpy() # [human_prob, ai_prob]
23
+
 
 
 
 
 
 
 
 
24
 
25
  def verdict(ai_prob):
26
  """Return a human-readable verdict based on AI probability"""
 
31
  elif 40 <= ai_prob < 60:
32
  return "Unclear – could be either human or AI-assisted."
33
  elif 60 <= ai_prob < 80:
34
+ return "Possibly AI-generated, or a human using AI assistance."
35
+ else: # ai_prob >= 80
36
+ return "Likely AI-generated or heavily AI-assisted."
37
 
38
  def detect_text(text):
39
  results = {}
 
54
 
55
  # ------------------ Final Score (Average) ------------------
56
  try:
57
+ ai_scores, human_scores = [], []
58
+
59
+ for r in results.values():
60
+ if isinstance(r, dict) and "AI Probability" in r and "Human Probability" in r:
61
+ ai_scores.append(r["AI Probability"])
62
+ human_scores.append(r["Human Probability"])
63
+
64
+ if ai_scores and human_scores:
65
+ avg_ai = sum(ai_scores) / len(ai_scores)
66
+ avg_human = sum(human_scores) / len(human_scores)
67
+
68
  results["Final Score"] = {
69
+ # "Human Probability (average)": float(round(avg_human, 2)),
70
+ # "AI Probability (average)": float(round(avg_ai, 2))
71
+ # "Verdict": verdict(avg_ai)
72
+ verdict(avg_ai)
73
  }
 
 
 
74
  except Exception as e:
75
  results["Final Score"] = {"error": str(e)}
76
 
77
  return results
78
 
79
+
80
  if __name__ == "__main__":
81
  text = input("Enter text to analyze:\n")
82
  output = detect_text(text)
 
83
  print("\n--- Detection Results ---")
84
  for model, scores in output.items():
85
  print(f"\n[{model}]")
86
+ for k, v in scores.items():
87
+ if isinstance(v, (int, float)): # only add % for numeric values
88
+ print(f"{k}: {v}%")
89
+ else:
90
+ print(f"{k}: {v}")