yagnik12 commited on
Commit
6b86f39
·
verified ·
1 Parent(s): a713ea4

Update ai_text_detector_valid_final.py

Browse files
Files changed (1) hide show
  1. ai_text_detector_valid_final.py +42 -44
ai_text_detector_valid_final.py CHANGED
@@ -1,43 +1,40 @@
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
  import numpy as np
 
4
 
5
  # Multiple AI text detection models
6
- MODELS = {
7
  "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
8
- "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
9
- "Andreas122001":"andreas122001/roberta-academic-detector",
10
  "roberta-mnli": "roberta-large-mnli"
11
  }
12
-
 
13
  def load_model(model_id):
14
- tokenizer = AutoTokenizer.from_pretrained(model_id)
15
- # Use the zero-shot classification pipeline for NLI models
16
  if model_id == "roberta-large-mnli":
17
- model = pipeline("zero-shot-classification", model=model_id, device=0 if torch.cuda.is_available() else -1)
18
  else:
 
19
  model = AutoModelForSequenceClassification.from_pretrained(model_id)
20
- return tokenizer, model
21
 
22
  def predict(text, tokenizer, model):
23
  if isinstance(model, pipeline):
24
- # Use the roberta-mnli model for zero-shot classification
25
- candidate_labels = ["This text was written by a human.", "This text was written by an AI."]
26
- result = model(text, candidate_labels)
27
-
28
- # The entailment score for each label is the probability
29
- human_prob = result["scores"][result["labels"].index("This text was written by a human.")]
30
- ai_prob = result["scores"][result["labels"].index("This text was written by an AI.")]
31
-
32
- return np.array([human_prob, ai_prob])
33
  else:
34
- # The existing code for other models
35
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
36
  with torch.no_grad():
37
  outputs = model(**inputs)
38
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
39
- return probs[0].numpy()
40
-
41
 
42
  def verdict(ai_prob):
43
  """Return a human-readable verdict based on AI probability"""
@@ -48,9 +45,9 @@ def verdict(ai_prob):
48
  elif 40 <= ai_prob < 60:
49
  return "Unclear – could be either human or AI-assisted."
50
  elif 60 <= ai_prob < 80:
51
- return "Possibly AI-generated, or a human using AI assistance."
52
- else: # ai_prob >= 80
53
- return "Likely AI-generated or heavily AI-assisted."
54
 
55
  def detect_text(text):
56
  results = {}
@@ -71,37 +68,38 @@ def detect_text(text):
71
 
72
  # ------------------ Final Score (Average) ------------------
73
  try:
74
- ai_scores, human_scores = [], []
75
-
76
- for r in results.values():
77
- if isinstance(r, dict) and "AI Probability" in r and "Human Probability" in r:
78
- ai_scores.append(r["AI Probability"])
79
- human_scores.append(r["Human Probability"])
80
-
81
- if ai_scores and human_scores:
82
- avg_ai = sum(ai_scores) / len(ai_scores)
83
- avg_human = sum(human_scores) / len(human_scores)
84
-
85
  results["Final Score"] = {
86
- # "Human Probability (average)": float(round(avg_human, 2)),
87
- # "AI Probability (average)": float(round(avg_ai, 2))
88
- # "Verdict": verdict(avg_ai)
89
- verdict(avg_ai)
90
  }
 
 
 
91
  except Exception as e:
92
  results["Final Score"] = {"error": str(e)}
93
 
94
  return results
95
 
96
-
97
  if __name__ == "__main__":
98
  text = input("Enter text to analyze:\n")
99
  output = detect_text(text)
 
100
  print("\n--- Detection Results ---")
101
  for model, scores in output.items():
102
  print(f"\n[{model}]")
103
- for k, v in scores.items():
104
- if isinstance(v, (int, float)): # only add % for numeric values
105
- print(f"{k}: {v}%")
106
- else:
107
- print(f"{k}: {v}")
 
 
 
 
 
 
 
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
3
  import numpy as np
4
+ import re
5
 
6
  # Multiple AI text detection models
7
+ MODELS = {
8
  "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
9
+ "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
10
+ "Andreas122001": "andreas122001/roberta-academic-detector",
11
  "roberta-mnli": "roberta-large-mnli"
12
  }
13
+
14
+ # Fix for "Final Score" formatting and zero-shot model handling
15
  def load_model(model_id):
 
 
16
  if model_id == "roberta-large-mnli":
17
+ return None, pipeline("zero-shot-classification", model=model_id, device=0 if torch.cuda.is_available() else -1)
18
  else:
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
20
  model = AutoModelForSequenceClassification.from_pretrained(model_id)
21
+ return tokenizer, model
22
 
23
  def predict(text, tokenizer, model):
24
  if isinstance(model, pipeline):
25
+ # Handle the zero-shot classification pipeline
26
+ labels = ["human-written", "AI-generated"]
27
+ result = model(text, labels)
28
+ human_score = result['scores'][result['labels'].index('human-written')]
29
+ ai_score = result['scores'][result['labels'].index('AI-generated')]
30
+ return np.array([human_score, ai_score])
 
 
 
31
  else:
32
+ # Normal text classification
33
  inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
34
  with torch.no_grad():
35
  outputs = model(**inputs)
36
  probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
37
+ return probs[0].numpy() # [human_prob, ai_prob]
 
38
 
39
  def verdict(ai_prob):
40
  """Return a human-readable verdict based on AI probability"""
 
45
  elif 40 <= ai_prob < 60:
46
  return "Unclear – could be either human or AI-assisted."
47
  elif 60 <= ai_prob < 80:
48
+ return "Likely AI-generated with some human editing."
49
+ else:
50
+ return "Most likely AI-generated."
51
 
52
  def detect_text(text):
53
  results = {}
 
68
 
69
  # ------------------ Final Score (Average) ------------------
70
  try:
71
+ valid_ai_scores = [r["AI Probability"] for r in results.values() if isinstance(r, dict) and "AI Probability" in r]
72
+
73
+ if valid_ai_scores:
74
+ avg_ai = sum(valid_ai_scores) / len(valid_ai_scores)
 
 
 
 
 
 
 
75
  results["Final Score"] = {
76
+ "Verdict": verdict(avg_ai)
 
 
 
77
  }
78
+ else:
79
+ results["Final Score"] = {"error": "No valid scores to calculate average."}
80
+
81
  except Exception as e:
82
  results["Final Score"] = {"error": str(e)}
83
 
84
  return results
85
 
 
86
  if __name__ == "__main__":
87
  text = input("Enter text to analyze:\n")
88
  output = detect_text(text)
89
+
90
  print("\n--- Detection Results ---")
91
  for model, scores in output.items():
92
  print(f"\n[{model}]")
93
+ if isinstance(scores, dict):
94
+ for k, v in scores.items():
95
+ if isinstance(v, (int, float)):
96
+ # Use a regex to clean up the number formatting for a cleaner output
97
+ v_str = re.sub(r'(\d+)\.0$', r'\1', f"{v:.2f}")
98
+ if k == "Verdict":
99
+ print(f"{k}: {v}")
100
+ else:
101
+ print(f"{k}: {v_str}%")
102
+ else:
103
+ print(f"{k}: {v}")
104
+ else:
105
+ print(f"Error: {scores}")