mohitrulzz commited on
Commit
1d8231b
·
verified ·
1 Parent(s): 8bd7b10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -59
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- import pdfplumber, docx, sqlite3, os, random
3
  from datetime import datetime
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer, util
@@ -7,14 +7,19 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import torch
8
  from duckduckgo_search import DDGS
9
  from fpdf import FPDF
 
 
10
 
11
  # -----------------------------
12
  # CONFIG
13
  # -----------------------------
14
  DB_NAME = "db.sqlite3"
 
 
15
  USERNAME = "aixbi"
16
  PASSWORD = "aixbi@123"
17
- MAX_SENTENCES_CHECK = 10
 
18
 
19
  # -----------------------------
20
  # DB INIT
@@ -45,38 +50,38 @@ model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatg
45
  # -----------------------------
46
  # FUNCTIONS
47
  # -----------------------------
48
- def extract_text(file_obj):
49
- name = file_obj.name
50
- if name.endswith(".pdf"):
51
- with pdfplumber.open(file_obj.name) as pdf:
52
  return " ".join(page.extract_text() for page in pdf.pages if page.extract_text())
53
- elif name.endswith(".docx"):
54
- doc = docx.Document(file_obj.name)
55
  return " ".join([p.text for p in doc.paragraphs])
56
- else:
57
- return file_obj.read().decode("utf-8")
 
58
 
59
- def detect_ai_text(text):
60
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
61
  with torch.no_grad():
62
  outputs = model(**inputs)
63
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
64
- return score # probability of AI-generated
65
 
66
- def live_plagiarism_check(sentences):
67
  ddgs = DDGS()
68
- samples = random.sample(sentences, min(MAX_SENTENCES_CHECK, len(sentences)))
69
- suspicious_sentences = []
 
70
  plagiarism_hits = 0
71
-
72
  for sentence in samples:
73
  results = list(ddgs.text(sentence, max_results=2))
74
  if results:
75
  plagiarism_hits += 1
76
- suspicious_sentences.append(sentence)
77
-
78
- score = (plagiarism_hits / len(samples)) * 100 if samples else 0
79
- return score, suspicious_sentences
80
 
81
  def save_result(student_id, student_name, ai_score, plagiarism_score):
82
  conn = sqlite3.connect(DB_NAME)
@@ -92,29 +97,51 @@ def load_results():
92
  conn.close()
93
  return df
94
 
95
- def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, output_path):
 
 
 
 
 
 
 
 
 
96
  pdf = FPDF()
97
  pdf.add_page()
98
- pdf.set_font("Arial", size=12)
99
 
100
- pdf.cell(200, 10, txt="AIxBI - Student Thesis Analysis Report", ln=True, align='C')
101
- pdf.ln(10)
102
- pdf.cell(200, 10, txt=f"Student: {student_name} ({student_id})", ln=True)
103
- pdf.cell(200, 10, txt=f"AI Probability: {ai_score:.2f}%", ln=True)
104
- pdf.cell(200, 10, txt=f"Plagiarism Score: {plagiarism_score:.2f}%", ln=True)
105
- pdf.cell(200, 10, txt=f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True)
 
 
 
 
 
 
 
 
 
106
  pdf.ln(10)
107
 
108
- pdf.multi_cell(0, 10, txt="Suspicious Sentences (Possible Plagiarism or AI-generated):")
109
- pdf.ln(5)
110
- if suspicious_sentences:
111
- for s in suspicious_sentences:
112
- pdf.multi_cell(0, 10, f"- {s}")
113
- pdf.ln(2)
114
- else:
115
- pdf.multi_cell(0, 10, "None detected.")
 
 
 
 
116
 
117
- pdf.output(output_path)
 
118
 
119
  # -----------------------------
120
  # APP LOGIC
@@ -125,35 +152,38 @@ def login(user, pwd):
125
  else:
126
  return gr.update(), gr.update(), "Invalid username or password!"
127
 
128
- def analyze(student_name, student_id, file_obj):
129
- if file_obj is None or not student_name or not student_id:
130
  return "Please fill all fields and upload a document.", None, None, None
131
 
132
- text = extract_text(file_obj)
133
- sentences = [s.strip() for s in text.split(". ") if len(s) > 30]
134
-
135
- # AI Detection
136
- ai_score = detect_ai_text(text) * 100
 
 
 
 
137
 
138
- # Live plagiarism
139
- plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences)
140
 
141
- # Save to DB
142
  save_result(student_id, student_name, ai_score, plagiarism_score)
 
143
 
144
- # Generate PDF Report
145
- output_pdf = f"{student_id}_report.pdf"
146
- generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, output_pdf)
147
-
148
- highlighted_text = "\n\n".join([f"⚠️ {s}" for s in suspicious_sentences]) if suspicious_sentences else "No suspicious sentences found."
149
- return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), output_pdf, highlighted_text
150
 
151
  def show_dashboard():
152
  df = load_results()
153
  return df
154
 
 
 
 
155
  with gr.Blocks() as demo:
156
- gr.Markdown("# AIxBI - Professional Thesis Checker")
 
157
 
158
  # Login Section
159
  login_box = gr.Group(visible=True)
@@ -169,20 +199,19 @@ with gr.Blocks() as demo:
169
  with gr.Tab("Check Thesis"):
170
  student_name = gr.Textbox(label="Student Name")
171
  student_id = gr.Textbox(label="Student ID")
172
- file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"])
173
  analyze_btn = gr.Button("Analyze Document")
174
  status = gr.Textbox(label="Status")
175
  ai_score = gr.Number(label="AI Probability (%)")
176
  plagiarism_score = gr.Number(label="Plagiarism Score (%)")
177
- suspicious_text = gr.Textbox(label="Suspicious Sentences Highlight", lines=10)
178
- pdf_output = gr.File(label="Download PDF Report")
179
-
180
  with gr.Tab("Summary Dashboard"):
181
  dashboard_btn = gr.Button("Refresh Dashboard")
182
  dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
183
 
184
  login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
185
- analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_output, suspicious_text])
186
  dashboard_btn.click(show_dashboard, outputs=[dashboard])
187
 
188
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import pdfplumber, docx, sqlite3, random, os
3
  from datetime import datetime
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer, util
 
7
  import torch
8
  from duckduckgo_search import DDGS
9
  from fpdf import FPDF
10
+ import qrcode
11
+ from PIL import Image
12
 
13
  # -----------------------------
14
  # CONFIG
15
  # -----------------------------
16
  DB_NAME = "db.sqlite3"
17
+ REPORT_DIR = "reports"
18
+ LOGO_PATH = "aixbi.jpg" # Place your uploaded logo in the root
19
  USERNAME = "aixbi"
20
  PASSWORD = "aixbi@123"
21
+
22
+ os.makedirs(REPORT_DIR, exist_ok=True)
23
 
24
  # -----------------------------
25
  # DB INIT
 
50
  # -----------------------------
51
  # FUNCTIONS
52
  # -----------------------------
53
+ def extract_text(file_path: str):
54
+ filepath = str(file_path)
55
+ if filepath.endswith(".pdf"):
56
+ with pdfplumber.open(filepath) as pdf:
57
  return " ".join(page.extract_text() for page in pdf.pages if page.extract_text())
58
+ elif filepath.endswith(".docx"):
59
+ doc = docx.Document(filepath)
60
  return " ".join([p.text for p in doc.paragraphs])
61
+ else: # txt
62
+ with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
63
+ return f.read()
64
 
65
+ def detect_ai_text(text: str):
66
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
67
  with torch.no_grad():
68
  outputs = model(**inputs)
69
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
70
+ return score * 100
71
 
72
+ def live_plagiarism_check(sentences, n_samples=3):
73
  ddgs = DDGS()
74
+ if not sentences:
75
+ return 0, []
76
+ samples = random.sample(sentences, min(n_samples, len(sentences)))
77
  plagiarism_hits = 0
78
+ top_sentences = []
79
  for sentence in samples:
80
  results = list(ddgs.text(sentence, max_results=2))
81
  if results:
82
  plagiarism_hits += 1
83
+ top_sentences.append(sentence)
84
+ return (plagiarism_hits / len(samples)) * 100, top_sentences
 
 
85
 
86
  def save_result(student_id, student_name, ai_score, plagiarism_score):
87
  conn = sqlite3.connect(DB_NAME)
 
97
  conn.close()
98
  return df
99
 
100
+ def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, top_sentences):
101
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
102
+ verdict = "Likely Original"
103
+ if ai_score > 70 or plagiarism_score > 50:
104
+ verdict = "⚠ High AI/Plagiarism Risk"
105
+ elif ai_score > 40 or plagiarism_score > 30:
106
+ verdict = "Moderate Risk"
107
+
108
+ filename = f"{REPORT_DIR}/Report_{student_id}_{int(datetime.now().timestamp())}.pdf"
109
+
110
  pdf = FPDF()
111
  pdf.add_page()
 
112
 
113
+ # Add Logo
114
+ if os.path.exists(LOGO_PATH):
115
+ pdf.image(LOGO_PATH, 10, 8, 33)
116
+
117
+ pdf.set_font("Arial", "B", 18)
118
+ pdf.cell(200, 20, "AIxBI - Thesis Analysis Report", ln=True, align="C")
119
+ pdf.ln(20)
120
+
121
+ pdf.set_font("Arial", size=12)
122
+ pdf.cell(200, 10, f"Student Name: {student_name}", ln=True)
123
+ pdf.cell(200, 10, f"Student ID: {student_id}", ln=True)
124
+ pdf.cell(200, 10, f"AI Probability: {ai_score:.2f}%", ln=True)
125
+ pdf.cell(200, 10, f"Plagiarism Score: {plagiarism_score:.2f}%", ln=True)
126
+ pdf.cell(200, 10, f"Verdict: {verdict}", ln=True)
127
+ pdf.cell(200, 10, f"Analysis Date: {timestamp}", ln=True)
128
  pdf.ln(10)
129
 
130
+ # Highlight top plagiarized sentences
131
+ if top_sentences:
132
+ pdf.set_text_color(255, 0, 0)
133
+ pdf.multi_cell(0, 10, "Top Plagiarized Sentences:\n" + "\n\n".join(top_sentences))
134
+ pdf.set_text_color(0, 0, 0)
135
+
136
+ # Generate QR Code
137
+ qr_data = f"AIxBI Verification\nID:{student_id}\nAI:{ai_score:.2f}% Plag:{plagiarism_score:.2f}%\nTime:{timestamp}"
138
+ qr_img = qrcode.make(qr_data)
139
+ qr_path = "qr_temp.png"
140
+ qr_img.save(qr_path)
141
+ pdf.image(qr_path, x=160, y=230, w=40)
142
 
143
+ pdf.output(filename)
144
+ return filename
145
 
146
  # -----------------------------
147
  # APP LOGIC
 
152
  else:
153
  return gr.update(), gr.update(), "Invalid username or password!"
154
 
155
+ def analyze(student_name, student_id, file_path):
156
+ if file_path is None or not student_name or not student_id:
157
  return "Please fill all fields and upload a document.", None, None, None
158
 
159
+ text = extract_text(file_path)
160
+ sentences = [s for s in text.split(". ") if len(s) > 20]
161
+
162
+ ai_score = detect_ai_text(text)
163
+ local_score = 0
164
+ if sentences:
165
+ embeddings = embedder.encode(sentences, convert_to_tensor=True)
166
+ cosine_scores = util.cos_sim(embeddings, embeddings)
167
+ local_score = (cosine_scores > 0.95).float().mean().item() * 100
168
 
169
+ live_score, top_sentences = live_plagiarism_check(sentences)
170
+ plagiarism_score = max(local_score, live_score)
171
 
 
172
  save_result(student_id, student_name, ai_score, plagiarism_score)
173
+ pdf_path = generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, top_sentences)
174
 
175
+ return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), pdf_path
 
 
 
 
 
176
 
177
  def show_dashboard():
178
  df = load_results()
179
  return df
180
 
181
+ # -----------------------------
182
+ # GRADIO INTERFACE
183
+ # -----------------------------
184
  with gr.Blocks() as demo:
185
+ gr.Image(LOGO_PATH, label="AIxBI", show_label=False)
186
+ gr.Markdown("# AIxBI - Plagiarism & AI Detection with PDF Reports")
187
 
188
  # Login Section
189
  login_box = gr.Group(visible=True)
 
199
  with gr.Tab("Check Thesis"):
200
  student_name = gr.Textbox(label="Student Name")
201
  student_id = gr.Textbox(label="Student ID")
202
+ file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"], type="filepath")
203
  analyze_btn = gr.Button("Analyze Document")
204
  status = gr.Textbox(label="Status")
205
  ai_score = gr.Number(label="AI Probability (%)")
206
  plagiarism_score = gr.Number(label="Plagiarism Score (%)")
207
+ pdf_report = gr.File(label="Download PDF Report")
208
+
 
209
  with gr.Tab("Summary Dashboard"):
210
  dashboard_btn = gr.Button("Refresh Dashboard")
211
  dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
212
 
213
  login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
214
+ analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_report])
215
  dashboard_btn.click(show_dashboard, outputs=[dashboard])
216
 
217
  if __name__ == "__main__":