import gradio as gr import pdfplumber, docx, sqlite3, random, os from datetime import datetime import pandas as pd from sentence_transformers import SentenceTransformer, util from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch from duckduckgo_search import DDGS from fpdf import FPDF import qrcode from PIL import Image # ----------------------------- # CONFIG # ----------------------------- DB_NAME = "db.sqlite3" REPORT_DIR = "reports" LOGO_PATH = "aixbi.jpg" # Place your uploaded logo in the root USERNAME = "aixbi" PASSWORD = "aixbi@123" os.makedirs(REPORT_DIR, exist_ok=True) # ----------------------------- # DB INIT # ----------------------------- def init_db(): conn = sqlite3.connect(DB_NAME) c = conn.cursor() c.execute("""CREATE TABLE IF NOT EXISTS results ( id INTEGER PRIMARY KEY AUTOINCREMENT, student_id TEXT, student_name TEXT, ai_score REAL, plagiarism_score REAL, timestamp TEXT )""") conn.commit() conn.close() init_db() # ----------------------------- # MODEL LOADING # ----------------------------- embedder = SentenceTransformer('all-MiniLM-L6-v2') tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta") model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta") # ----------------------------- # FUNCTIONS # ----------------------------- def extract_text(file_path: str): filepath = str(file_path) if filepath.endswith(".pdf"): with pdfplumber.open(filepath) as pdf: return " ".join(page.extract_text() for page in pdf.pages if page.extract_text()) elif filepath.endswith(".docx"): doc = docx.Document(filepath) return " ".join([p.text for p in doc.paragraphs]) else: # txt with open(filepath, "r", encoding="utf-8", errors="ignore") as f: return f.read() def detect_ai_text(text: str): inputs = tokenizer(text[:512], return_tensors="pt", truncation=True) with torch.no_grad(): outputs = model(**inputs) score = torch.softmax(outputs.logits, dim=1)[0][1].item() return score * 100 def live_plagiarism_check(sentences, n_samples=3): ddgs = DDGS() if not sentences: return 0, [] samples = random.sample(sentences, min(n_samples, len(sentences))) plagiarism_hits = 0 top_sentences = [] for sentence in samples: results = list(ddgs.text(sentence, max_results=2)) if results: plagiarism_hits += 1 top_sentences.append(sentence) return (plagiarism_hits / len(samples)) * 100, top_sentences def save_result(student_id, student_name, ai_score, plagiarism_score): conn = sqlite3.connect(DB_NAME) c = conn.cursor() c.execute("INSERT INTO results (student_id, student_name, ai_score, plagiarism_score, timestamp) VALUES (?,?,?,?,?)", (student_id, student_name, ai_score, plagiarism_score, datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) conn.commit() conn.close() def load_results(): conn = sqlite3.connect(DB_NAME) df = pd.read_sql_query("SELECT * FROM results", conn) conn.close() return df def generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, top_sentences): timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") verdict = "Likely Original" if ai_score > 70 or plagiarism_score > 50: verdict = "⚠ High AI/Plagiarism Risk" elif ai_score > 40 or plagiarism_score > 30: verdict = "Moderate Risk" filename = f"{REPORT_DIR}/Report_{student_id}_{int(datetime.now().timestamp())}.pdf" pdf = FPDF() pdf.add_page() # Add Logo if os.path.exists(LOGO_PATH): pdf.image(LOGO_PATH, 10, 8, 33) pdf.set_font("Arial", "B", 18) pdf.cell(200, 20, "AIxBI - Thesis Analysis Report", ln=True, align="C") pdf.ln(20) pdf.set_font("Arial", size=12) pdf.cell(200, 10, f"Student Name: {student_name}", ln=True) pdf.cell(200, 10, f"Student ID: {student_id}", ln=True) pdf.cell(200, 10, f"AI Probability: {ai_score:.2f}%", ln=True) pdf.cell(200, 10, f"Plagiarism Score: {plagiarism_score:.2f}%", ln=True) pdf.cell(200, 10, f"Verdict: {verdict}", ln=True) pdf.cell(200, 10, f"Analysis Date: {timestamp}", ln=True) pdf.ln(10) # Highlight top plagiarized sentences if top_sentences: pdf.set_text_color(255, 0, 0) pdf.multi_cell(0, 10, "Top Plagiarized Sentences:\n" + "\n\n".join(top_sentences)) pdf.set_text_color(0, 0, 0) # Generate QR Code qr_data = f"AIxBI Verification\nID:{student_id}\nAI:{ai_score:.2f}% Plag:{plagiarism_score:.2f}%\nTime:{timestamp}" qr_img = qrcode.make(qr_data) qr_path = "qr_temp.png" qr_img.save(qr_path) pdf.image(qr_path, x=160, y=230, w=40) pdf.output(filename) return filename # ----------------------------- # APP LOGIC # ----------------------------- def login(user, pwd): if user == USERNAME and pwd == PASSWORD: return gr.update(visible=False), gr.update(visible=True), "" else: return gr.update(), gr.update(), "Invalid username or password!" def analyze(student_name, student_id, file_path): if file_path is None or not student_name or not student_id: return "Please fill all fields and upload a document.", None, None, None text = extract_text(file_path) sentences = [s for s in text.split(". ") if len(s) > 20] ai_score = detect_ai_text(text) local_score = 0 if sentences: embeddings = embedder.encode(sentences, convert_to_tensor=True) cosine_scores = util.cos_sim(embeddings, embeddings) local_score = (cosine_scores > 0.95).float().mean().item() * 100 live_score, top_sentences = live_plagiarism_check(sentences) plagiarism_score = max(local_score, live_score) save_result(student_id, student_name, ai_score, plagiarism_score) pdf_path = generate_pdf_report(student_name, student_id, ai_score, plagiarism_score, top_sentences) return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2), pdf_path def show_dashboard(): df = load_results() return df # ----------------------------- # GRADIO INTERFACE # ----------------------------- with gr.Blocks() as demo: gr.Image(LOGO_PATH, label="AIxBI", show_label=False) gr.Markdown("# AIxBI - Plagiarism & AI Detection with PDF Reports") # Login Section login_box = gr.Group(visible=True) with login_box: user = gr.Textbox(label="Username") pwd = gr.Textbox(label="Password", type="password") login_btn = gr.Button("Login") login_msg = gr.Markdown("") # Main App app_box = gr.Group(visible=False) with app_box: with gr.Tab("Check Thesis"): student_name = gr.Textbox(label="Student Name") student_id = gr.Textbox(label="Student ID") file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"], type="filepath") analyze_btn = gr.Button("Analyze Document") status = gr.Textbox(label="Status") ai_score = gr.Number(label="AI Probability (%)") plagiarism_score = gr.Number(label="Plagiarism Score (%)") pdf_report = gr.File(label="Download PDF Report") with gr.Tab("Summary Dashboard"): dashboard_btn = gr.Button("Refresh Dashboard") dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"]) login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg]) analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score, pdf_report]) dashboard_btn.click(show_dashboard, outputs=[dashboard]) if __name__ == "__main__": demo.launch()