jaifar530 commited on
Commit
a3c831e
·
unverified ·
1 Parent(s): 45533f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -23
app.py CHANGED
@@ -234,8 +234,7 @@ def predict_author(new_text, model, tokenizer, label_encoder):
234
  return predicted_label, author_probabilities
235
 
236
  new_text = st.text_area("Input Your Text Here:")
237
- word_count = len(re.findall(r'\w+', new_text))
238
- st.write(word_count)
239
 
240
  # Creates a button named 'Press me'
241
  press_me_button = st.button("Human or Robot?")
@@ -243,9 +242,9 @@ press_me_button = st.button("Human or Robot?")
243
  if press_me_button:
244
 
245
  ########## ML
246
-
247
  word_count = len(re.findall(r'\w+', new_text))
248
- st.write(word_count)
249
 
250
  # Choose the appropriate model based on word count
251
  if 10 <= word_count <= 34:
@@ -303,18 +302,10 @@ if press_me_button:
303
  ridge_prediction = ridge_model.predict(user_input_transformed)
304
  extra_trees_prediction = extra_trees_model.predict(user_input_transformed)
305
 
306
- if ridge_prediction == extra_trees_prediction:
307
- st.write(f"Same pridiction (Ridge & ExtraTree): {ridge_prediction[0]}")
308
- else:
309
- st.write("Different predictions:")
310
- st.write(f"Ridge says: {ridge_prediction[0]}")
311
- st.write(f"Extra Trees says: {extra_trees_prediction[0]}")
312
-
313
-
314
  ########## DL
315
  predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
316
  sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
317
-
318
  author_map = {
319
  "googlebard": "Google Bard",
320
  "gpt3": "ChatGPT-3",
@@ -322,17 +313,40 @@ if press_me_button:
322
  "huggingface": "HuggingChat",
323
  "human": "Human-Written"
324
  }
 
 
 
325
 
326
- predicted_author_diplay_name = author_map.get(predicted_author, predicted_author)
327
- st.write(f"The text is most likely written by: {predicted_author_diplay_name}")
328
- st.write("Probabilities for each author are (sorted):")
329
- # Mapping the internal names to display names
330
-
331
-
332
- for author, prob in sorted_probabilities:
333
- display_name = author_map.get(author, author) # Retrieve the display name, fall back to original if not found
334
- st.write(f"{display_name}: {prob * 100:.2f}%")
335
- st.progress(float(prob))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  # Using expander to make FAQ sections
338
  st.subheader("Frequently Asked Questions (FAQ)")
 
234
  return predicted_label, author_probabilities
235
 
236
  new_text = st.text_area("Input Your Text Here:")
237
+
 
238
 
239
  # Creates a button named 'Press me'
240
  press_me_button = st.button("Human or Robot?")
 
242
  if press_me_button:
243
 
244
  ########## ML
245
+
246
  word_count = len(re.findall(r'\w+', new_text))
247
+ st.write(f"Words Count: {word_count}")
248
 
249
  # Choose the appropriate model based on word count
250
  if 10 <= word_count <= 34:
 
302
  ridge_prediction = ridge_model.predict(user_input_transformed)
303
  extra_trees_prediction = extra_trees_model.predict(user_input_transformed)
304
 
 
 
 
 
 
 
 
 
305
  ########## DL
306
  predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
307
  sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
308
+
309
  author_map = {
310
  "googlebard": "Google Bard",
311
  "gpt3": "ChatGPT-3",
 
313
  "huggingface": "HuggingChat",
314
  "human": "Human-Written"
315
  }
316
+ cnn_predicted_author_diplay_name = author_map.get(predicted_author, predicted_author)
317
+ ridge_predicted_author_diplay_name = author_map.get(ridge_prediction[0], ridge_prediction[0])
318
+ extra_trees_predicted_author_diplay_name = author_map.get(extra_trees_prediction[0], extra_trees_prediction[0])
319
 
320
+ if ridge_prediction == extra_trees_prediction == predicted_author:
321
+ st.write(f"The text is most likely written by: {ridge_predicted_author_diplay_name}")
322
+
323
+ elif ridge_prediction == extra_trees_prediction:
324
+ st.write(f"The text is most likely written by: {ridge_predicted_author_diplay_name}")
325
+
326
+ elif extra_trees_prediction == predicted_author:
327
+ st.write(f"The text is most likely written by: {extra_trees_predicted_author_diplay_name}")
328
+
329
+ elif ridge_prediction == predicted_author:
330
+ st.write(f"The text is most likely written by: {ridge_predicted_author_diplay_name}")
331
+
332
+ else:
333
+ st.write("Difficult to Pridict this text, it might fill into one of the below:")
334
+ st.write(cnn_predicted_author_diplay_name)
335
+ st.write(ridge_predicted_author_diplay_name)
336
+ st.write(extra_trees_predicted_author_diplay_name)
337
+
338
+ # with st.expander("What is this project about?"):
339
+ # st.write("""
340
+ # This project is part of an MSc in Data Analytics at the University of Portsmouth.
341
+ # Developed by Jaifar Al Shizawi, it aims to identify whether a text is written by a human or a specific Large Language Model (LLM) like ChatGPT-3, ChatGPT-4, Google Bard, or HuggingChat.
342
+ # For inquiries, contact [[email protected]](mailto:[email protected]).
343
+ # Supervised by Dr. Mohamed Bader.
344
+ # """)
345
+
346
+ # for author, prob in sorted_probabilities:
347
+ # display_name = author_map.get(author, author) # Retrieve the display name, fall back to original if not found
348
+ # st.write(f"{display_name}: {prob * 100:.2f}%")
349
+ # st.progress(float(prob))
350
 
351
  # Using expander to make FAQ sections
352
  st.subheader("Frequently Asked Questions (FAQ)")