jaifar530 commited on
Commit
3b7bbcd
·
unverified ·
1 Parent(s): 91871c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -15
app.py CHANGED
@@ -1,5 +1,4 @@
1
  import streamlit as st
2
- st.write("Test system if working")
3
  import zipfile
4
  import os
5
  import requests
@@ -16,8 +15,7 @@ headers = {
16
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
17
  }
18
 
19
- # Debugging: Print current working directory initially
20
- st.write(f"Initial Current Working Directory: {os.getcwd()}")
21
 
22
  # Check if the model folder exists
23
  zip_file_path = "my_authorship_model_zip.zip"
@@ -60,17 +58,8 @@ if not os.path.exists('my_authorship_model'):
60
  st.write(f"Failed to download or extract the model: {e}")
61
  exit(1)
62
  else:
63
- st.write("Model folder exists")
64
-
65
- # Debugging: Print current working directory after extraction
66
- st.write(f"Current Working Directory After Extraction: {os.getcwd()}")
67
 
68
- # Debugging: Check if model folder contains required files
69
- try:
70
- model_files = os.listdir('my_authorship_model')
71
- st.write(f"Files in model folder: {model_files}")
72
- except Exception as e:
73
- st.write(f"Could not list files in model folder: {e}")
74
 
75
  # Download the required files
76
  file_urls = {
@@ -115,11 +104,10 @@ def predict_author(new_text, model, tokenizer, label_encoder):
115
 
116
  return predicted_label, author_probabilities
117
 
118
- st.markdown("CNN : version: 1.2")
119
  new_text = st.text_area("Input your text here")
120
 
121
  # Creates a button named 'Press me'
122
- press_me_button = st.button("Which Model Used?")
123
 
124
  if press_me_button:
125
  predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
@@ -129,3 +117,58 @@ if press_me_button:
129
  st.write("Probabilities for each author are (sorted):")
130
  for author, prob in sorted_probabilities:
131
  st.write(f"{author}: {prob * 100:.2f}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  import zipfile
3
  import os
4
  import requests
 
15
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
16
  }
17
 
18
+
 
19
 
20
  # Check if the model folder exists
21
  zip_file_path = "my_authorship_model_zip.zip"
 
58
  st.write(f"Failed to download or extract the model: {e}")
59
  exit(1)
60
  else:
61
+ st.write("System Ready !!")
 
 
 
62
 
 
 
 
 
 
 
63
 
64
  # Download the required files
65
  file_urls = {
 
104
 
105
  return predicted_label, author_probabilities
106
 
 
107
  new_text = st.text_area("Input your text here")
108
 
109
  # Creates a button named 'Press me'
110
+ press_me_button = st.button("Writer or Robot?")
111
 
112
  if press_me_button:
113
  predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
 
117
  st.write("Probabilities for each author are (sorted):")
118
  for author, prob in sorted_probabilities:
119
  st.write(f"{author}: {prob * 100:.2f}%")
120
+
121
+ import streamlit as st
122
+
123
+ st.title("Smart Authorship Detection System of AI-Generated Text Models")
124
+
125
+ # Slogan under the title
126
+ st.subheader("Uncover the Invisible Ink: Who's the Author?")
127
+
128
+ # Using expander to make FAQ sections
129
+ st.subheader("Frequently Asked Questions (FAQ)")
130
+
131
+ # Small Description
132
+ with st.expander("What is this project about?"):
133
+ st.write("""
134
+ This project is part of an MSc in Data Analytics at the University of Portsmouth.
135
+ Developed by Jaifar Al Shizawi, it aims to identify whether a text is written by a human or a specific Large Language Model (LLM) like ChatGPT-3, ChatGPT-4, Google Bard, or HuggingChat.
136
+ For inquiries, contact [[email protected]](mailto:[email protected]).
137
+ Supervised by Dr. Mohamed Bader.
138
+ """)
139
+
140
+ # System Details
141
+ with st.expander("How does the system work?"):
142
+ st.write("""
143
+ The system is trained using a CNN model on a dataset of 140,546 paragraphs, varying in length from 10 to 500 words.
144
+ It achieves an accuracy of 0.9964 with a validation loss of 0.094.
145
+ """)
146
+
147
+ # Data Storage Information
148
+ with st.expander("Does the system store my data?"):
149
+ st.write("No, the system does not collect or store any user input data.")
150
+
151
+ # Use-case Limitation
152
+ with st.expander("Can I use this as evidence?"):
153
+ st.write("""
154
+ No, this system is a Proof of Concept (POC) and should not be used as evidence against students or similar entities.
155
+ """)
156
+
157
+ # Background and Context
158
+ with st.expander("Background and Context"):
159
+ st.write("""
160
+ The proliferation of AI and Large Language Models (LLMs) like ChatGPT and Google Bard has raised questions about authorship. This project aims to analyze and predict the unique features of these models compared to human-written text.
161
+ """)
162
+
163
+ # Problem Statement
164
+ with st.expander("Problem Statement"):
165
+ st.write("""
166
+ Most AI authorship detection systems fail to identify the specific LLM behind a text. This research aims to fill this gap, offering detailed analysis on various LLMs and human writing.
167
+ """)
168
+
169
+ # Aim and Objectives
170
+ with st.expander("Aim and Objectives"):
171
+ st.write("""
172
+ The project aims to help staff at the University of Portsmouth distinguish between student-written artifacts and those generated by LLMs. It focuses on text feature extraction, model testing, and implementing a user-friendly dashboard among other objectives.
173
+ """)
174
+