mohitrulzz commited on
Commit
4f7293e
·
verified ·
1 Parent(s): 91d8c5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -96
app.py CHANGED
@@ -1,25 +1,21 @@
1
  import gradio as gr
2
- import pdfplumber, docx, sqlite3, random, os
3
  from datetime import datetime
4
  import pandas as pd
5
- from sentence_transformers import SentenceTransformer, util
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import torch
8
  from duckduckgo_search import DDGS
9
  from fpdf import FPDF
10
- import qrcode
11
- from PIL import Image
12
 
13
  # -----------------------------
14
  # CONFIG
15
  # -----------------------------
16
  DB_NAME = "db.sqlite3"
17
- REPORT_DIR = "reports"
18
- LOGO_PATH = "aixbi.jpg" # Place your uploaded logo in the root
19
  USERNAME = "aixbi"
20
  PASSWORD = "aixbi@123"
21
-
22
- os.makedirs(REPORT_DIR, exist_ok=True)
23
 
24
  # -----------------------------
25
  # DB INIT
@@ -44,44 +40,52 @@ init_db()
44
  # MODEL LOADING
45
  # -----------------------------
46
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
47
- tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
48
- model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
49
 
50
  # -----------------------------
51
- # FUNCTIONS
52
  # -----------------------------
53
- def extract_text(file_path: str):
54
- filepath = str(file_path)
55
- if filepath.endswith(".pdf"):
56
- with pdfplumber.open(filepath) as pdf:
57
- return " ".join(page.extract_text() for page in pdf.pages if page.extract_text())
58
- elif filepath.endswith(".docx"):
59
- doc = docx.Document(filepath)
60
- return " ".join([p.text for p in doc.paragraphs])
61
- else: # txt
62
- with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
63
- return f.read()
64
-
65
- def detect_ai_text(text: str):
 
 
 
 
 
 
 
66
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
67
  with torch.no_grad():
68
  outputs = model(**inputs)
69
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
70
- return score * 100
71
 
72
- def live_plagiarism_check(sentences, n_samples=3):
73
  ddgs = DDGS()
74
- if not sentences:
75
- return 0, []
76
- samples = random.sample(sentences, min(n_samples, len(sentences)))
77
  plagiarism_hits = 0
78
- top_sentences = []
79
  for sentence in samples:
80
  results = list(ddgs.text(sentence, max_results=2))
81
  if results:
82
  plagiarism_hits += 1
83
- top_sentences.append(sentence)
84
- return (plagiarism_hits / len(samples)) * 100, top_sentences
 
 
85
 
86
  def save_result(student_id, student_name, ai_score, plagiarism_score):
87
  conn = sqlite3.connect(DB_NAME)
@@ -97,51 +101,54 @@ def load_results():
97
  conn.close()
98
  return df
99
 
100
- def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, top_sentences):
101
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
102
- verdict = "Likely Original"
103
- if ai_score > 70 or plagiarism_score > 50:
104
- verdict = "⚠ High AI/Plagiarism Risk"
105
- elif ai_score > 40 or plagiarism_score > 30:
106
- verdict = "Moderate Risk"
107
-
108
- filename = f"{REPORT_DIR}/Report_{student_id}_{int(datetime.now().timestamp())}.pdf"
109
-
110
- pdf = FPDF()
111
  pdf.add_page()
112
 
113
- # Add Logo
114
  if os.path.exists(LOGO_PATH):
115
- pdf.image(LOGO_PATH, 10, 8, 33)
116
 
117
- pdf.set_font("Arial", "B", 18)
118
- pdf.cell(200, 20, "AIxBI - Thesis Analysis Report", ln=True, align="C")
119
  pdf.ln(20)
120
 
121
  pdf.set_font("Arial", size=12)
122
- pdf.cell(200, 10, f"Student Name: {student_name}", ln=True)
123
- pdf.cell(200, 10, f"Student ID: {student_id}", ln=True)
124
- pdf.cell(200, 10, f"AI Probability: {ai_score:.2f}%", ln=True)
125
- pdf.cell(200, 10, f"Plagiarism Score: {plagiarism_score:.2f}%", ln=True)
126
- pdf.cell(200, 10, f"Verdict: {verdict}", ln=True)
127
- pdf.cell(200, 10, f"Analysis Date: {timestamp}", ln=True)
128
  pdf.ln(10)
129
 
130
- # Highlight top plagiarized sentences
131
- if top_sentences:
132
- pdf.set_text_color(255, 0, 0)
133
- pdf.multi_cell(0, 10, "Top Plagiarized Sentences:\n" + "\n\n".join(top_sentences))
134
- pdf.set_text_color(0, 0, 0)
 
 
135
 
136
- # Generate QR Code
137
- qr_data = f"AIxBI Verification\nID:{student_id}\nAI:{ai_score:.2f}% Plag:{plagiarism_score:.2f}%\nTime:{timestamp}"
138
- qr_img = qrcode.make(qr_data)
139
- qr_path = "qr_temp.png"
140
- qr_img.save(qr_path)
141
- pdf.image(qr_path, x=160, y=230, w=40)
142
 
143
- pdf.output(filename)
144
- return filename
 
 
 
 
 
 
145
 
146
  # -----------------------------
147
  # APP LOGIC
@@ -152,66 +159,80 @@ def login(user, pwd):
152
  else:
153
  return gr.update(), gr.update(), "Invalid username or password!"
154
 
155
- def analyze(student_name, student_id, file_path):
156
- if file_path is None or not student_name or not student_id:
157
- return "Please fill all fields and upload a document.", None, None, None
158
-
159
- text = extract_text(file_path)
160
- sentences = [s for s in text.split(". ") if len(s) > 20]
161
-
162
- ai_score = detect_ai_text(text)
163
- local_score = 0
164
- if sentences:
165
- embeddings = embedder.encode(sentences, convert_to_tensor=True)
166
- cosine_scores = util.cos_sim(embeddings, embeddings)
167
- local_score = (cosine_scores > 0.95).float().mean().item() * 100
168
 
169
- live_score, top_sentences = live_plagiarism_check(sentences)
170
- plagiarism_score = max(local_score, live_score)
171
 
 
 
 
 
 
 
 
172
  save_result(student_id, student_name, ai_score, plagiarism_score)
173
- pdf_path = generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, top_sentences)
174
 
175
- return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), pdf_path
 
 
 
 
 
 
 
 
176
 
177
  def show_dashboard():
178
  df = load_results()
179
  return df
180
 
181
  # -----------------------------
182
- # GRADIO INTERFACE
183
  # -----------------------------
184
- with gr.Blocks() as demo:
185
- gr.Image(LOGO_PATH, label="AIxBI", show_label=False)
186
- gr.Markdown("# AIxBI - Plagiarism & AI Detection with PDF Reports")
 
 
187
 
188
  # Login Section
189
  login_box = gr.Group(visible=True)
190
  with login_box:
191
  user = gr.Textbox(label="Username")
192
  pwd = gr.Textbox(label="Password", type="password")
193
- login_btn = gr.Button("Login")
194
  login_msg = gr.Markdown("")
195
 
196
  # Main App
197
  app_box = gr.Group(visible=False)
198
  with app_box:
199
  with gr.Tab("Check Thesis"):
200
- student_name = gr.Textbox(label="Student Name")
201
- student_id = gr.Textbox(label="Student ID")
202
- file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"], type="filepath")
203
- analyze_btn = gr.Button("Analyze Document")
 
204
  status = gr.Textbox(label="Status")
205
  ai_score = gr.Number(label="AI Probability (%)")
206
  plagiarism_score = gr.Number(label="Plagiarism Score (%)")
207
- pdf_report = gr.File(label="Download PDF Report")
208
-
 
209
  with gr.Tab("Summary Dashboard"):
210
- dashboard_btn = gr.Button("Refresh Dashboard")
211
  dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
212
 
213
  login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
214
- analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_report])
215
  dashboard_btn.click(show_dashboard, outputs=[dashboard])
216
 
217
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import pdfplumber, docx, sqlite3, os, random
3
  from datetime import datetime
4
  import pandas as pd
5
+ from sentence_transformers import SentenceTransformer
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import torch
8
  from duckduckgo_search import DDGS
9
  from fpdf import FPDF
 
 
10
 
11
  # -----------------------------
12
  # CONFIG
13
  # -----------------------------
14
  DB_NAME = "db.sqlite3"
 
 
15
  USERNAME = "aixbi"
16
  PASSWORD = "aixbi@123"
17
+ MAX_SENTENCES_CHECK = 10
18
+ LOGO_PATH = "aixbi.jpg" # Place your logo here
19
 
20
  # -----------------------------
21
  # DB INIT
 
40
  # MODEL LOADING
41
  # -----------------------------
42
  embedder = SentenceTransformer('all-MiniLM-L6-v2')
43
+ tokenizer = AutoTokenizer.from_pretrained("SuperAnnotate/ai-detector")
44
+ model = AutoModelForSequenceClassification.from_pretrained("SuperAnnotate/ai-detector")
45
 
46
  # -----------------------------
47
+ # SAFE TEXT EXTRACTION
48
  # -----------------------------
49
+ def extract_text(file_obj):
50
+ try:
51
+ name = file_obj.name
52
+ if name.endswith(".pdf"):
53
+ with pdfplumber.open(file_obj.name) as pdf:
54
+ text = " ".join(page.extract_text() or "" for page in pdf.pages)
55
+ return text.strip() if text else None
56
+ elif name.endswith(".docx"):
57
+ doc = docx.Document(file_obj.name)
58
+ text = " ".join([p.text for p in doc.paragraphs])
59
+ return text.strip() if text else None
60
+ elif name.endswith(".txt"):
61
+ text = file_obj.read().decode("utf-8", errors="ignore")
62
+ return text.strip() if text else None
63
+ else:
64
+ return None
65
+ except Exception:
66
+ return None
67
+
68
+ def detect_ai_text(text):
69
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
70
  with torch.no_grad():
71
  outputs = model(**inputs)
72
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
73
+ return score # probability of AI-generated
74
 
75
+ def live_plagiarism_check(sentences):
76
  ddgs = DDGS()
77
+ samples = random.sample(sentences, min(MAX_SENTENCES_CHECK, len(sentences)))
78
+ suspicious_sentences = []
 
79
  plagiarism_hits = 0
80
+
81
  for sentence in samples:
82
  results = list(ddgs.text(sentence, max_results=2))
83
  if results:
84
  plagiarism_hits += 1
85
+ suspicious_sentences.append(sentence)
86
+
87
+ score = (plagiarism_hits / len(samples)) * 100 if samples else 0
88
+ return score, suspicious_sentences
89
 
90
  def save_result(student_id, student_name, ai_score, plagiarism_score):
91
  conn = sqlite3.connect(DB_NAME)
 
101
  conn.close()
102
  return df
103
 
104
+ # -----------------------------
105
+ # PDF REPORT WITH LOGO & COLORS
106
+ # -----------------------------
107
+ class HighlightPDF(FPDF):
108
+ def add_highlighted_sentence(self, sentence, color):
109
+ self.set_fill_color(*color)
110
+ self.multi_cell(0, 10, sentence, fill=True)
111
+ self.ln(1)
112
+
113
+ def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, sample_text, output_path):
114
+ pdf = HighlightPDF()
115
  pdf.add_page()
116
 
117
+ # Add logo
118
  if os.path.exists(LOGO_PATH):
119
+ pdf.image(LOGO_PATH, 10, 8, 20, 20)
120
 
121
+ pdf.set_font("Arial", style='B', size=14)
122
+ pdf.cell(200, 10, txt="AIxBI - Ultimate Document Plagiarism Report", ln=True, align='C')
123
  pdf.ln(20)
124
 
125
  pdf.set_font("Arial", size=12)
126
+ pdf.multi_cell(0, 10, txt=f"Student: {student_name} ({student_id})")
127
+ pdf.multi_cell(0, 10, txt=f"AI Probability: {ai_score:.2f}%")
128
+ pdf.multi_cell(0, 10, txt=f"Plagiarism Score: {plagiarism_score:.2f}%")
129
+ pdf.multi_cell(0, 10, txt=f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 
 
130
  pdf.ln(10)
131
 
132
+ pdf.multi_cell(0, 10, txt="Suspicious Sentences Detected:")
133
+ if suspicious_sentences:
134
+ for s in suspicious_sentences:
135
+ pdf.add_highlighted_sentence(f"- {s}", (255, 200, 200)) # Red for suspicious
136
+ else:
137
+ pdf.multi_cell(0, 10, "None detected.")
138
+ pdf.ln(10)
139
 
140
+ pdf.multi_cell(0, 10, txt="Sample Detected Text (AI/Plagiarized Excerpt):")
141
+ pdf.add_highlighted_sentence(sample_text, (255, 230, 200)) # Orange
142
+ pdf.ln(10)
 
 
 
143
 
144
+ pdf.multi_cell(0, 10, txt="Recommendations for Student:")
145
+ recommendations = """1. Rewrite detected sentences in your own words.
146
+ 2. Add citations for any copied or referenced material.
147
+ 3. Avoid using AI content directly—use as guidance, not verbatim.
148
+ 4. Use plagiarism tools and proofread before submission."""
149
+ pdf.multi_cell(0, 10, recommendations)
150
+
151
+ pdf.output(output_path)
152
 
153
  # -----------------------------
154
  # APP LOGIC
 
159
  else:
160
  return gr.update(), gr.update(), "Invalid username or password!"
161
 
162
+ def analyze(student_name, student_id, file_obj):
163
+ if file_obj is None or not student_name or not student_id:
164
+ return "Please fill all fields and upload a document.", None, None, None, None
165
+
166
+ text = extract_text(file_obj)
167
+ if not text:
168
+ return "Error: Could not read the file. Please upload a valid PDF, DOCX, or TXT.", None, None, None, None
169
+
170
+ sentences = [s.strip() for s in text.split(". ") if len(s) > 30]
 
 
 
 
171
 
172
+ # AI Detection
173
+ ai_score = detect_ai_text(text) * 100
174
 
175
+ # Live plagiarism
176
+ plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences)
177
+
178
+ # Pick a sample suspicious excerpt for report
179
+ sample_text = suspicious_sentences[0] if suspicious_sentences else text[:200]
180
+
181
+ # Save to DB
182
  save_result(student_id, student_name, ai_score, plagiarism_score)
 
183
 
184
+ # Generate PDF Report
185
+ output_pdf = f"{student_id}_report.pdf"
186
+ generate_pdf_report(
187
+ student_name, student_id, ai_score, plagiarism_score,
188
+ suspicious_sentences, sample_text, output_pdf
189
+ )
190
+
191
+ highlighted_text = "\n\n".join([f"⚠️ {s}" for s in suspicious_sentences]) if suspicious_sentences else "No suspicious sentences found."
192
+ return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), output_pdf, highlighted_text
193
 
194
  def show_dashboard():
195
  df = load_results()
196
  return df
197
 
198
  # -----------------------------
199
+ # GRADIO UI (LIGHT THEME & LOGO)
200
  # -----------------------------
201
+ with gr.Blocks(theme="default") as demo:
202
+ with gr.Row():
203
+ if os.path.exists(LOGO_PATH):
204
+ gr.Image(LOGO_PATH, elem_id="logo", show_label=False, scale=0.2)
205
+ gr.Markdown("## **AIxBI - Ultimate Document Plagiarism Software**\n#### Professional Thesis & AI Content Detector", elem_id="title")
206
 
207
  # Login Section
208
  login_box = gr.Group(visible=True)
209
  with login_box:
210
  user = gr.Textbox(label="Username")
211
  pwd = gr.Textbox(label="Password", type="password")
212
+ login_btn = gr.Button("Login", variant="primary")
213
  login_msg = gr.Markdown("")
214
 
215
  # Main App
216
  app_box = gr.Group(visible=False)
217
  with app_box:
218
  with gr.Tab("Check Thesis"):
219
+ with gr.Row():
220
+ student_name = gr.Textbox(label="Student Name")
221
+ student_id = gr.Textbox(label="Student ID")
222
+ file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"])
223
+ analyze_btn = gr.Button("Analyze Document", variant="primary")
224
  status = gr.Textbox(label="Status")
225
  ai_score = gr.Number(label="AI Probability (%)")
226
  plagiarism_score = gr.Number(label="Plagiarism Score (%)")
227
+ suspicious_text = gr.Textbox(label="Suspicious Sentences Highlight", lines=10)
228
+ pdf_output = gr.File(label="Download PDF Report")
229
+
230
  with gr.Tab("Summary Dashboard"):
231
+ dashboard_btn = gr.Button("Refresh Dashboard", variant="secondary")
232
  dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
233
 
234
  login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
235
+ analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_output, suspicious_text])
236
  dashboard_btn.click(show_dashboard, outputs=[dashboard])
237
 
238
  if __name__ == "__main__":