mohitrulzz commited on
Commit
f1090ff
·
verified ·
1 Parent(s): c76366a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +157 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pdfplumber, docx, sqlite3, os, random
3
+ from datetime import datetime
4
+ import pandas as pd
5
+ from sentence_transformers import SentenceTransformer, util
6
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
+ import torch
8
+ from duckduckgo_search import DDGS
9
+
10
+ # -----------------------------
11
+ # CONFIG
12
+ # -----------------------------
13
+ DB_NAME = "db.sqlite3"
14
+ USERNAME = "aixbi"
15
+ PASSWORD = "aixbi@123"
16
+
17
+ # -----------------------------
18
+ # DB INIT
19
+ # -----------------------------
20
+ def init_db():
21
+ conn = sqlite3.connect(DB_NAME)
22
+ c = conn.cursor()
23
+ c.execute("""CREATE TABLE IF NOT EXISTS results (
24
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
25
+ student_id TEXT,
26
+ student_name TEXT,
27
+ ai_score REAL,
28
+ plagiarism_score REAL,
29
+ timestamp TEXT
30
+ )""")
31
+ conn.commit()
32
+ conn.close()
33
+
34
+ init_db()
35
+
36
+ # -----------------------------
37
+ # MODEL LOADING (only once)
38
+ # -----------------------------
39
+ embedder = SentenceTransformer('all-MiniLM-L6-v2')
40
+ tokenizer = AutoTokenizer.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
41
+ model = AutoModelForSequenceClassification.from_pretrained("hello-simpleai/chatgpt-detector-roberta")
42
+
43
+ # -----------------------------
44
+ # FUNCTIONS
45
+ # -----------------------------
46
+ def extract_text(file_obj):
47
+ name = file_obj.name
48
+ if name.endswith(".pdf"):
49
+ with pdfplumber.open(file_obj.name) as pdf:
50
+ return " ".join(page.extract_text() for page in pdf.pages if page.extract_text())
51
+ elif name.endswith(".docx"):
52
+ doc = docx.Document(file_obj.name)
53
+ return " ".join([p.text for p in doc.paragraphs])
54
+ else:
55
+ return file_obj.read().decode("utf-8")
56
+
57
+ def detect_ai_text(text):
58
+ inputs = tokenizer(text[:512], return_tensors="pt", truncation=True)
59
+ with torch.no_grad():
60
+ outputs = model(**inputs)
61
+ score = torch.softmax(outputs.logits, dim=1)[0][1].item()
62
+ return score # probability of AI-generated
63
+
64
+ def live_plagiarism_check(sentences, n_samples=3):
65
+ ddgs = DDGS()
66
+ samples = random.sample(sentences, min(n_samples, len(sentences)))
67
+ plagiarism_hits = 0
68
+ for sentence in samples:
69
+ results = list(ddgs.text(sentence, max_results=2))
70
+ if results:
71
+ plagiarism_hits += 1
72
+ return (plagiarism_hits / len(samples)) * 100
73
+
74
+ def save_result(student_id, student_name, ai_score, plagiarism_score):
75
+ conn = sqlite3.connect(DB_NAME)
76
+ c = conn.cursor()
77
+ c.execute("INSERT INTO results (student_id, student_name, ai_score, plagiarism_score, timestamp) VALUES (?,?,?,?,?)",
78
+ (student_id, student_name, ai_score, plagiarism_score, datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
79
+ conn.commit()
80
+ conn.close()
81
+
82
+ def load_results():
83
+ conn = sqlite3.connect(DB_NAME)
84
+ df = pd.read_sql_query("SELECT * FROM results", conn)
85
+ conn.close()
86
+ return df
87
+
88
+ # -----------------------------
89
+ # APP LOGIC
90
+ # -----------------------------
91
+ def login(user, pwd):
92
+ if user == USERNAME and pwd == PASSWORD:
93
+ return gr.update(visible=False), gr.update(visible=True), ""
94
+ else:
95
+ return gr.update(), gr.update(), "Invalid username or password!"
96
+
97
+ def analyze(student_name, student_id, file_obj):
98
+ if file_obj is None or not student_name or not student_id:
99
+ return "Please fill all fields and upload a document.", None, None
100
+
101
+ text = extract_text(file_obj)
102
+ sentences = [s for s in text.split(". ") if len(s) > 20]
103
+
104
+ # AI Detection
105
+ ai_score = detect_ai_text(text) * 100
106
+
107
+ # Local similarity
108
+ embeddings = embedder.encode(sentences, convert_to_tensor=True)
109
+ cosine_scores = util.cos_sim(embeddings, embeddings)
110
+ local_score = (cosine_scores > 0.95).float().mean().item() * 100
111
+
112
+ # Live web check
113
+ live_score = live_plagiarism_check(sentences)
114
+ plagiarism_score = max(local_score, live_score)
115
+
116
+ # Save to DB
117
+ save_result(student_id, student_name, ai_score, plagiarism_score)
118
+
119
+ return f"Analysis Completed for {student_name} ({student_id})", round(ai_score,2), round(plagiarism_score,2)
120
+
121
+ def show_dashboard():
122
+ df = load_results()
123
+ return df
124
+
125
+ with gr.Blocks() as demo:
126
+ gr.Markdown("# AIxBI - Plagiarism & AI Detection")
127
+
128
+ # Login Section
129
+ login_box = gr.Group(visible=True)
130
+ with login_box:
131
+ user = gr.Textbox(label="Username")
132
+ pwd = gr.Textbox(label="Password", type="password")
133
+ login_btn = gr.Button("Login")
134
+ login_msg = gr.Markdown("")
135
+
136
+ # Main App
137
+ app_box = gr.Group(visible=False)
138
+ with app_box:
139
+ with gr.Tab("Check Thesis"):
140
+ student_name = gr.Textbox(label="Student Name")
141
+ student_id = gr.Textbox(label="Student ID")
142
+ file_upload = gr.File(label="Upload Document", file_types=[".pdf",".docx",".txt"])
143
+ analyze_btn = gr.Button("Analyze Document")
144
+ status = gr.Textbox(label="Status")
145
+ ai_score = gr.Number(label="AI Probability (%)")
146
+ plagiarism_score = gr.Number(label="Plagiarism Score (%)")
147
+
148
+ with gr.Tab("Summary Dashboard"):
149
+ dashboard_btn = gr.Button("Refresh Dashboard")
150
+ dashboard = gr.Dataframe(headers=["id","student_id","student_name","ai_score","plagiarism_score","timestamp"])
151
+
152
+ login_btn.click(login, inputs=[user, pwd], outputs=[login_box, app_box, login_msg])
153
+ analyze_btn.click(analyze, inputs=[student_name, student_id, file_upload], outputs=[status, ai_score, plagiarism_score])
154
+ dashboard_btn.click(show_dashboard, outputs=[dashboard])
155
+
156
+ if __name__ == "__main__":
157
+ demo.launch()