mohitrulzz commited on
Commit
1eca925
·
verified ·
1 Parent(s): 966ceba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -45
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
2
- import pdfplumber, docx, sqlite3, random
3
  from datetime import datetime
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer, util
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import torch
8
  from duckduckgo_search import DDGS
 
9
 
10
  # -----------------------------
11
  # CONFIG
@@ -13,6 +14,7 @@ from duckduckgo_search import DDGS
13
  DB_NAME = "db.sqlite3"
14
  USERNAME = "aixbi"
15
  PASSWORD = "aixbi@123"
 
16
 
17
  # -----------------------------
18
  # DB INIT
@@ -43,39 +45,38 @@ model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatg
43
  # -----------------------------
44
  # FUNCTIONS
45
  # -----------------------------
46
- def extract_text(file_path):
47
- """Extracts text from PDF, DOCX, or TXT files given a file path."""
48
- filepath = str(file_path)
49
-
50
- if filepath.endswith(".pdf"):
51
- with pdfplumber.open(filepath) as pdf:
52
  return " ".join(page.extract_text() for page in pdf.pages if page.extract_text())
53
- elif filepath.endswith(".docx"):
54
- doc = docx.Document(filepath)
55
  return " ".join([p.text for p in doc.paragraphs])
56
- else: # txt
57
- with open(filepath, "r", encoding="utf-8", errors="ignore") as f:
58
- return f.read()
59
 
60
  def detect_ai_text(text):
61
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
62
  with torch.no_grad():
63
  outputs = model(**inputs)
64
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
65
- return score * 100 # return as percentage
66
 
67
- def live_plagiarism_check(sentences, n_samples=3):
68
- """Randomly samples sentences and checks them online."""
69
  ddgs = DDGS()
70
- if not sentences:
71
- return 0
72
- samples = random.sample(sentences, min(n_samples, len(sentences)))
73
  plagiarism_hits = 0
 
74
  for sentence in samples:
75
  results = list(ddgs.text(sentence, max_results=2))
76
  if results:
77
  plagiarism_hits += 1
78
- return (plagiarism_hits / len(samples)) * 100
 
 
 
79
 
80
  def save_result(student_id, student_name, ai_score, plagiarism_score):
81
  conn = sqlite3.connect(DB_NAME)
@@ -91,6 +92,30 @@ def load_results():
91
  conn.close()
92
  return df
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # -----------------------------
95
  # APP LOGIC
96
  # -----------------------------
@@ -100,42 +125,35 @@ def login(user, pwd):
100
  else:
101
  return gr.update(), gr.update(), "Invalid username or password!"
102
 
103
- def analyze(student_name, student_id, file_path):
104
- if file_path is None or not student_name or not student_id:
105
- return "Please fill all fields and upload a document.", None, None
106
-
107
- text = extract_text(file_path)
108
- sentences = [s for s in text.split(". ") if len(s) > 20]
109
 
 
 
 
110
  # AI Detection
111
- ai_score = detect_ai_text(text)
112
-
113
- # Local similarity check
114
- if sentences:
115
- embeddings = embedder.encode(sentences, convert_to_tensor=True)
116
- cosine_scores = util.cos_sim(embeddings, embeddings)
117
- local_score = (cosine_scores > 0.95).float().mean().item() * 100
118
- else:
119
- local_score = 0
120
 
121
- # Live web check
122
- live_score = live_plagiarism_check(sentences)
123
- plagiarism_score = max(local_score, live_score)
124
 
125
  # Save to DB
126
  save_result(student_id, student_name, ai_score, plagiarism_score)
127
 
128
- return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2)
 
 
 
 
 
129
 
130
  def show_dashboard():
131
  df = load_results()
132
  return df
133
 
134
- # -----------------------------
135
- # GRADIO INTERFACE
136
- # -----------------------------
137
  with gr.Blocks() as demo:
138
- gr.Markdown("# AIxBI - Plagiarism & AI Detection")
139
 
140
  # Login Section
141
  login_box = gr.Group(visible=True)
@@ -151,18 +169,20 @@ with gr.Blocks() as demo:
151
  with gr.Tab("Check Thesis"):
152
  student_name = gr.Textbox(label="Student Name")
153
  student_id = gr.Textbox(label="Student ID")
154
- file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"], type="filepath")
155
  analyze_btn = gr.Button("Analyze Document")
156
  status = gr.Textbox(label="Status")
157
  ai_score = gr.Number(label="AI Probability (%)")
158
  plagiarism_score = gr.Number(label="Plagiarism Score (%)")
159
-
 
 
160
  with gr.Tab("Summary Dashboard"):
161
  dashboard_btn = gr.Button("Refresh Dashboard")
162
  dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
163
 
164
  login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
165
- analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score])
166
  dashboard_btn.click(show_dashboard, outputs=[dashboard])
167
 
168
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ import pdfplumber, docx, sqlite3, os, random
3
  from datetime import datetime
4
  import pandas as pd
5
  from sentence_transformers import SentenceTransformer, util
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  import torch
8
  from duckduckgo_search import DDGS
9
+ from fpdf import FPDF
10
 
11
  # -----------------------------
12
  # CONFIG
 
14
  DB_NAME = "db.sqlite3"
15
  USERNAME = "aixbi"
16
  PASSWORD = "aixbi@123"
17
+ MAX_SENTENCES_CHECK = 10
18
 
19
  # -----------------------------
20
  # DB INIT
 
45
  # -----------------------------
46
  # FUNCTIONS
47
  # -----------------------------
48
+ def extract_text(file_obj):
49
+ name = file_obj.name
50
+ if name.endswith(".pdf"):
51
+ with pdfplumber.open(file_obj.name) as pdf:
 
 
52
  return " ".join(page.extract_text() for page in pdf.pages if page.extract_text())
53
+ elif name.endswith(".docx"):
54
+ doc = docx.Document(file_obj.name)
55
  return " ".join([p.text for p in doc.paragraphs])
56
+ else:
57
+ return file_obj.read().decode("utf-8")
 
58
 
59
  def detect_ai_text(text):
60
  inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
61
  with torch.no_grad():
62
  outputs = model(**inputs)
63
  score = torch.softmax(outputs.logits, dim=1)[0][1].item()
64
+ return score # probability of AI-generated
65
 
66
+ def live_plagiarism_check(sentences):
 
67
  ddgs = DDGS()
68
+ samples = random.sample(sentences, min(MAX_SENTENCES_CHECK, len(sentences)))
69
+ suspicious_sentences = []
 
70
  plagiarism_hits = 0
71
+
72
  for sentence in samples:
73
  results = list(ddgs.text(sentence, max_results=2))
74
  if results:
75
  plagiarism_hits += 1
76
+ suspicious_sentences.append(sentence)
77
+
78
+ score = (plagiarism_hits / len(samples)) * 100 if samples else 0
79
+ return score, suspicious_sentences
80
 
81
  def save_result(student_id, student_name, ai_score, plagiarism_score):
82
  conn = sqlite3.connect(DB_NAME)
 
92
  conn.close()
93
  return df
94
 
95
+ def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, output_path):
96
+ pdf = FPDF()
97
+ pdf.add_page()
98
+ pdf.set_font("Arial", size=12)
99
+
100
+ pdf.cell(200, 10, txt="AIxBI - Student Thesis Analysis Report", ln=True, align='C')
101
+ pdf.ln(10)
102
+ pdf.cell(200, 10, txt=f"Student: {student_name} ({student_id})", ln=True)
103
+ pdf.cell(200, 10, txt=f"AI Probability: {ai_score:.2f}%", ln=True)
104
+ pdf.cell(200, 10, txt=f"Plagiarism Score: {plagiarism_score:.2f}%", ln=True)
105
+ pdf.cell(200, 10, txt=f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", ln=True)
106
+ pdf.ln(10)
107
+
108
+ pdf.multi_cell(0, 10, txt="Suspicious Sentences (Possible Plagiarism or AI-generated):")
109
+ pdf.ln(5)
110
+ if suspicious_sentences:
111
+ for s in suspicious_sentences:
112
+ pdf.multi_cell(0, 10, f"- {s}")
113
+ pdf.ln(2)
114
+ else:
115
+ pdf.multi_cell(0, 10, "None detected.")
116
+
117
+ pdf.output(output_path)
118
+
119
  # -----------------------------
120
  # APP LOGIC
121
  # -----------------------------
 
125
  else:
126
  return gr.update(), gr.update(), "Invalid username or password!"
127
 
128
+ def analyze(student_name, student_id, file_obj):
129
+ if file_obj is None or not student_name or not student_id:
130
+ return "Please fill all fields and upload a document.", None, None, None
 
 
 
131
 
132
+ text = extract_text(file_obj)
133
+ sentences = [s.strip() for s in text.split(". ") if len(s) > 30]
134
+
135
  # AI Detection
136
+ ai_score = detect_ai_text(text) * 100
 
 
 
 
 
 
 
 
137
 
138
+ # Live plagiarism
139
+ plagiarism_score, suspicious_sentences = live_plagiarism_check(sentences)
 
140
 
141
  # Save to DB
142
  save_result(student_id, student_name, ai_score, plagiarism_score)
143
 
144
+ # Generate PDF Report
145
+ output_pdf = f"{student_id}_report.pdf"
146
+ generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, suspicious_sentences, output_pdf)
147
+
148
+ highlighted_text = "\n\n".join([f"⚠️ {s}" for s in suspicious_sentences]) if suspicious_sentences else "No suspicious sentences found."
149
+ return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), output_pdf, highlighted_text
150
 
151
  def show_dashboard():
152
  df = load_results()
153
  return df
154
 
 
 
 
155
  with gr.Blocks() as demo:
156
+ gr.Markdown("# AIxBI - Professional Thesis Checker")
157
 
158
  # Login Section
159
  login_box = gr.Group(visible=True)
 
169
  with gr.Tab("Check Thesis"):
170
  student_name = gr.Textbox(label="Student Name")
171
  student_id = gr.Textbox(label="Student ID")
172
+ file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"])
173
  analyze_btn = gr.Button("Analyze Document")
174
  status = gr.Textbox(label="Status")
175
  ai_score = gr.Number(label="AI Probability (%)")
176
  plagiarism_score = gr.Number(label="Plagiarism Score (%)")
177
+ suspicious_text = gr.Textbox(label="Suspicious Sentences Highlight", lines=10)
178
+ pdf_output = gr.File(label="Download PDF Report")
179
+
180
  with gr.Tab("Summary Dashboard"):
181
  dashboard_btn = gr.Button("Refresh Dashboard")
182
  dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
183
 
184
  login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
185
+ analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_output, suspicious_text])
186
  dashboard_btn.click(show_dashboard, outputs=[dashboard])
187
 
188
  if __name__ == "__main__":