mohitrulzz commited on
Commit
af8e43a
·
verified ·
1 Parent(s): 8703329

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -29
app.py CHANGED
@@ -15,7 +15,7 @@ DB_NAME = "db.sqlite3"
15
  USERNAME = "aixbi"
16
  PASSWORD = "aixbi@123"
17
  MAX_SENTENCES_CHECK = 10
18
- LOGO_PATH = "aixbi.jpg" # Place your logo here
19
 
20
  # -----------------------------
21
  # DB INIT
@@ -40,43 +40,51 @@ init_db()
40
  # MODEL LOADING
41
  # -----------------------------
42
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
43
- tokenizer = AutoTokenizer.from_pretrained("roberta-base-openai-detector")
44
- model = AutoModelForSequenceClassification.from_pretrained("roberta-base-openai-detector")
45
 
46
  # -----------------------------
47
- # SAFE TEXT EXTRACTION
48
  # -----------------------------
49
  def extract_text(file_obj):
50
- try:
51
- # Save to temp path for compatibility with Hugging Face Spaces
52
- with tempfile.NamedTemporaryFile(delete=False) as tmp:
53
- shutil.copyfile(file_obj.name, tmp.name)
54
- tmp_path = tmp.name
 
55
 
56
- if tmp_path.endswith(".pdf"):
 
 
 
 
 
 
57
  with pdfplumber.open(tmp_path) as pdf:
58
  text = " ".join(page.extract_text() or "" for page in pdf.pages)
59
- return text.strip() if text else None
60
- elif tmp_path.endswith(".docx"):
61
  doc = docx.Document(tmp_path)
62
- text = " ".join([p.text for p in doc.paragraphs])
63
- return text.strip() if text else None
64
- elif tmp_path.endswith(".txt"):
65
  with open(tmp_path, "r", encoding="utf-8", errors="ignore") as f:
66
  text = f.read()
67
- return text.strip() if text else None
68
  else:
69
  return None
70
- except Exception as e:
71
- print("Read error:", e)
72
  return None
73
 
 
 
 
 
 
74
  def detect_ai_text(text):
75
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
76
  with torch.no_grad():
77
  outputs = model(**inputs)
78
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
79
- return score * 100 # Return % probability
80
 
81
  def live_plagiarism_check(sentences):
82
  ddgs = DDGS()
@@ -93,6 +101,9 @@ def live_plagiarism_check(sentences):
93
  score = (plagiarism_hits / len(samples)) * 100 if samples else 0
94
  return score, suspicious_sentences
95
 
 
 
 
96
  def save_result(student_id, student_name, ai_score, plagiarism_score):
97
  conn = sqlite3.connect(DB_NAME)
98
  c = conn.cursor()
@@ -108,7 +119,7 @@ def load_results():
108
  return df
109
 
110
  # -----------------------------
111
- # PDF REPORT WITH LOGO & COLORS
112
  # -----------------------------
113
  class HighlightPDF(FPDF):
114
  def add_highlighted_sentence(self, sentence, color):
@@ -120,9 +131,9 @@ def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, su
120
  pdf = HighlightPDF()
121
  pdf.add_page()
122
 
123
- # Add logo small on top-left
124
  if os.path.exists(LOGO_PATH):
125
- pdf.image(LOGO_PATH, 10, 8, 15, 15)
126
 
127
  pdf.set_font("Arial", style='B', size=14)
128
  pdf.cell(200, 10, txt="AIxBI - Ultimate Document Plagiarism Report", ln=True, align='C')
@@ -138,7 +149,7 @@ def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, su
138
  pdf.multi_cell(0, 10, txt="Suspicious Sentences Detected:")
139
  if suspicious_sentences:
140
  for s in suspicious_sentences:
141
- pdf.add_highlighted_sentence(f"- {s}", (255, 200, 200)) # Red for suspicious
142
  else:
143
  pdf.multi_cell(0, 10, "None detected.")
144
  pdf.ln(10)
@@ -149,9 +160,9 @@ def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, su
149
 
150
  pdf.multi_cell(0, 10, txt="Recommendations for Student:")
151
  recommendations = """1. Rewrite detected sentences in your own words.
152
- 2. Add citations for any copied or referenced material.
153
- 3. Avoid using AI content directly—use as guidance, not verbatim.
154
- 4. Use plagiarism tools and proofread before submission."""
155
  pdf.multi_cell(0, 10, recommendations)
156
 
157
  pdf.output(output_path)
@@ -175,12 +186,19 @@ def analyze(student_name, student_id, file_obj):
175
 
176
  sentences = [s.strip() for s in text.split(". ") if len(s) > 30]
177
 
178
- ai_score = detect_ai_text(text)
 
 
 
179
  plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences)
 
 
180
  sample_text = suspicious_sentences[0] if suspicious_sentences else text[:200]
181
 
 
182
  save_result(student_id, student_name, ai_score, plagiarism_score)
183
 
 
184
  output_pdf = f"{student_id}_report.pdf"
185
  generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_pdf)
186
 
@@ -194,12 +212,13 @@ def show_dashboard():
194
  # -----------------------------
195
  # GRADIO UI (LIGHT THEME)
196
  # -----------------------------
197
- with gr.Blocks(theme="default", css="body { background-color: #f9f9f9; }") as demo:
198
  with gr.Row():
199
  if os.path.exists(LOGO_PATH):
200
- gr.Image(LOGO_PATH, elem_id="logo", show_label=False, height=50, width=50)
201
  gr.Markdown("## **AIxBI - Ultimate Document Plagiarism Software**\n#### Professional Thesis & AI Content Detector", elem_id="title")
202
 
 
203
  login_box = gr.Group(visible=True)
204
  with login_box:
205
  user = gr.Textbox(label="Username")
@@ -207,6 +226,7 @@ with gr.Blocks(theme="default", css="body { background-color: #f9f9f9; }") as de
207
  login_btn = gr.Button("Login", variant="primary")
208
  login_msg = gr.Markdown("")
209
 
 
210
  app_box = gr.Group(visible=False)
211
  with app_box:
212
  with gr.Tab("Check Thesis"):
 
15
  USERNAME = "aixbi"
16
  PASSWORD = "aixbi@123"
17
  MAX_SENTENCES_CHECK = 10
18
+ LOGO_PATH = "aixbi.jpg" # Place your logo file here
19
 
20
  # -----------------------------
21
  # DB INIT
 
40
  # MODEL LOADING
41
  # -----------------------------
42
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
43
+ tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
44
+ model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
45
 
46
  # -----------------------------
47
+ # FILE HANDLING
48
  # -----------------------------
49
  def extract_text(file_obj):
50
+ """Extracts text safely from PDF/DOCX/TXT"""
51
+ if file_obj is None:
52
+ return None
53
+
54
+ name = file_obj.name
55
+ ext = os.path.splitext(name)[1].lower()
56
 
57
+ # Copy to temp file preserving extension
58
+ with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
59
+ shutil.copy(file_obj.name, tmp.name)
60
+ tmp_path = tmp.name
61
+
62
+ try:
63
+ if ext == ".pdf":
64
  with pdfplumber.open(tmp_path) as pdf:
65
  text = " ".join(page.extract_text() or "" for page in pdf.pages)
66
+ elif ext == ".docx":
 
67
  doc = docx.Document(tmp_path)
68
+ text = " ".join(p.text for p in doc.paragraphs)
69
+ elif ext == ".txt":
 
70
  with open(tmp_path, "r", encoding="utf-8", errors="ignore") as f:
71
  text = f.read()
 
72
  else:
73
  return None
74
+ except:
 
75
  return None
76
 
77
+ return text.strip() if text else None
78
+
79
+ # -----------------------------
80
+ # AI & PLAGIARISM DETECTION
81
+ # -----------------------------
82
  def detect_ai_text(text):
83
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
84
  with torch.no_grad():
85
  outputs = model(**inputs)
86
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
87
+ return score # probability of AI-generated
88
 
89
  def live_plagiarism_check(sentences):
90
  ddgs = DDGS()
 
101
  score = (plagiarism_hits / len(samples)) * 100 if samples else 0
102
  return score, suspicious_sentences
103
 
104
+ # -----------------------------
105
+ # DB OPS
106
+ # -----------------------------
107
  def save_result(student_id, student_name, ai_score, plagiarism_score):
108
  conn = sqlite3.connect(DB_NAME)
109
  c = conn.cursor()
 
119
  return df
120
 
121
  # -----------------------------
122
+ # PDF REPORT
123
  # -----------------------------
124
  class HighlightPDF(FPDF):
125
  def add_highlighted_sentence(self, sentence, color):
 
131
  pdf = HighlightPDF()
132
  pdf.add_page()
133
 
134
+ # Logo
135
  if os.path.exists(LOGO_PATH):
136
+ pdf.image(LOGO_PATH, 10, 8, 20, 20)
137
 
138
  pdf.set_font("Arial", style='B', size=14)
139
  pdf.cell(200, 10, txt="AIxBI - Ultimate Document Plagiarism Report", ln=True, align='C')
 
149
  pdf.multi_cell(0, 10, txt="Suspicious Sentences Detected:")
150
  if suspicious_sentences:
151
  for s in suspicious_sentences:
152
+ pdf.add_highlighted_sentence(f"- {s}", (255, 200, 200)) # Red
153
  else:
154
  pdf.multi_cell(0, 10, "None detected.")
155
  pdf.ln(10)
 
160
 
161
  pdf.multi_cell(0, 10, txt="Recommendations for Student:")
162
  recommendations = """1. Rewrite detected sentences in your own words.
163
+ 2. Add citations for copied/referenced material.
164
+ 3. Avoid direct AI-generated content.
165
+ 4. Proofread and recheck plagiarism before submission."""
166
  pdf.multi_cell(0, 10, recommendations)
167
 
168
  pdf.output(output_path)
 
186
 
187
  sentences = [s.strip() for s in text.split(". ") if len(s) > 30]
188
 
189
+ # AI Detection
190
+ ai_score = detect_ai_text(text) * 100
191
+
192
+ # Live plagiarism
193
  plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences)
194
+
195
+ # Pick sample excerpt
196
  sample_text = suspicious_sentences[0] if suspicious_sentences else text[:200]
197
 
198
+ # Save to DB
199
  save_result(student_id, student_name, ai_score, plagiarism_score)
200
 
201
+ # Generate PDF
202
  output_pdf = f"{student_id}_report.pdf"
203
  generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_pdf)
204
 
 
212
  # -----------------------------
213
  # GRADIO UI (LIGHT THEME)
214
  # -----------------------------
215
+ with gr.Blocks(theme="default") as demo:
216
  with gr.Row():
217
  if os.path.exists(LOGO_PATH):
218
+ gr.Image(LOGO_PATH, elem_id="logo", show_label=False, scale=0.2)
219
  gr.Markdown("## **AIxBI - Ultimate Document Plagiarism Software**\n#### Professional Thesis & AI Content Detector", elem_id="title")
220
 
221
+ # Login
222
  login_box = gr.Group(visible=True)
223
  with login_box:
224
  user = gr.Textbox(label="Username")
 
226
  login_btn = gr.Button("Login", variant="primary")
227
  login_msg = gr.Markdown("")
228
 
229
+ # Main App
230
  app_box = gr.Group(visible=False)
231
  with app_box:
232
  with gr.Tab("Check Thesis"):