Spaces:
Sleeping
Sleeping
jaifar530
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -332,188 +332,53 @@ if press_me_button:
|
|
332 |
max_cnn_prob_name = sorted_probabilities[0][0]
|
333 |
max_cnn_prob = float(sorted_probabilities[0][1])
|
334 |
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
if extra_trees_prediction == predicted_author:
|
343 |
-
st.success(f"Most likely written by: **{extra_trees_name}**", icon="β
")
|
344 |
-
st.success(f"2nd Most likely written by: **{ridge_name}**", icon="β
")
|
345 |
-
# st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
346 |
-
st.write("_" * 30)
|
347 |
-
# rain(
|
348 |
-
# emoji="π",
|
349 |
-
# font_size=54,
|
350 |
-
# falling_speed=5,
|
351 |
-
# animation_length="infinite",
|
352 |
-
# )
|
353 |
|
354 |
elif ridge_prediction == predicted_author:
|
355 |
-
st.success(f"Most likely written by: **{
|
356 |
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
357 |
-
# st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
358 |
st.write("_" * 30)
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
# animation_length="infinite",
|
364 |
-
# )
|
365 |
-
|
366 |
-
elif ridge_prediction == extra_trees_prediction:
|
367 |
-
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
368 |
-
st.success(f"2nd Most likely written by: **{cnn_name}**", icon="β
")
|
369 |
-
#st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
370 |
st.write("_" * 30)
|
371 |
-
# rain(
|
372 |
-
# emoji="π",
|
373 |
-
# font_size=54,
|
374 |
-
# falling_speed=5,
|
375 |
-
# animation_length="infinite",
|
376 |
-
# )
|
377 |
-
else:
|
378 |
-
# Repeat the text with a space at the end of each iteration
|
379 |
|
380 |
-
# Load proper pre-trained for full texts
|
381 |
-
file_prefix = 'not_trancated_full_paragraph.xlsx'
|
382 |
-
with open(f"{file_prefix}_ridge_model.pkl", 'rb') as file:
|
383 |
-
ridge_model = pickle.load(file)
|
384 |
-
|
385 |
-
with open(f"{file_prefix}_extra_trees_model.pkl", 'rb') as file:
|
386 |
-
extra_trees_model = pickle.load(file)
|
387 |
-
|
388 |
-
with open(f"{file_prefix}_vectorizer.pkl", 'rb') as file:
|
389 |
-
vectorizer = pickle.load(file)
|
390 |
-
|
391 |
-
repeated_text = ""
|
392 |
-
max_word_count = 500
|
393 |
-
amplify = 1
|
394 |
-
if word_count >= max_word_count:
|
395 |
-
amplify = 2
|
396 |
else:
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
new_text = repeated_text
|
403 |
-
|
404 |
-
word_count = len(re.findall(r'\w+', new_text))
|
405 |
-
## Repeat ML
|
406 |
-
|
407 |
-
# Transform the input
|
408 |
-
user_input_transformed = vectorizer.transform([new_text])
|
409 |
|
410 |
-
|
411 |
-
ridge_prediction = ridge_model.predict(user_input_transformed)
|
412 |
-
extra_trees_prediction = extra_trees_model.predict(user_input_transformed)
|
413 |
-
|
414 |
-
### Repeat DL
|
415 |
-
predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
|
416 |
-
sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
|
417 |
-
|
418 |
-
new_max_cnn_prob_name = sorted_probabilities[0][0]
|
419 |
-
new_max_cnn_prob = float(sorted_probabilities[0][1])
|
420 |
-
|
421 |
-
# Get disply name
|
422 |
-
cnn_name, ridge_name, extra_trees_name = get_author_display_name(predicted_author, ridge_prediction, extra_trees_prediction)
|
423 |
-
with st.expander("2nd iteration Details..."):
|
424 |
-
st.write(f"Ridge: {ridge_name}")
|
425 |
-
st.write(f"ExtraTree: {extra_trees_name}")
|
426 |
-
st.write(f"CNN: {cnn_name}")
|
427 |
-
for author, prob in sorted_probabilities:
|
428 |
-
display_name = author_map.get(author, author)
|
429 |
-
st.write(f"{display_name}: {prob * 100:.2f}%")
|
430 |
-
st.progress(float(prob))
|
431 |
-
|
432 |
if ridge_prediction == extra_trees_prediction == predicted_author:
|
433 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
434 |
-
st.
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
# falling_speed=5,
|
440 |
-
# animation_length="infinite",
|
441 |
-
# )
|
442 |
-
elif new_max_cnn_prob_name == max_cnn_prob_name:
|
443 |
-
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
444 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
445 |
st.write("_" * 30)
|
446 |
-
|
447 |
-
# emoji="π",
|
448 |
-
# font_size=54,
|
449 |
-
# falling_speed=5,
|
450 |
-
# animation_length="infinite",
|
451 |
-
# )
|
452 |
-
|
453 |
elif ridge_prediction == extra_trees_prediction:
|
454 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
455 |
st.success(f"2nd Most likely written by: **{cnn_name}**", icon="β
")
|
456 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
457 |
-
st.write("_" * 30)
|
458 |
-
# rain(
|
459 |
-
# emoji="π",
|
460 |
-
# font_size=54,
|
461 |
-
# falling_speed=5,
|
462 |
-
# animation_length="infinite",
|
463 |
-
# )
|
464 |
-
|
465 |
-
elif extra_trees_prediction == predicted_author:
|
466 |
-
st.success(f"Most likely written by: **{extra_trees_name}**", icon="β
")
|
467 |
-
st.success(f"2nd Most likely written by: **{ridge_name}**", icon="β
")
|
468 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
469 |
st.write("_" * 30)
|
470 |
-
# rain(
|
471 |
-
# emoji="π",
|
472 |
-
# font_size=54,
|
473 |
-
# falling_speed=5,
|
474 |
-
# animation_length="infinite",
|
475 |
-
# )
|
476 |
-
|
477 |
-
elif ridge_prediction == predicted_author:
|
478 |
-
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
479 |
-
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
480 |
-
st.warning(f"**Notice:** The input text has been magnified {amplify} times to better capture its characteristics and patterns.", icon="β οΈ")
|
481 |
-
st.write("_" * 30)
|
482 |
-
# rain(
|
483 |
-
# emoji="π",
|
484 |
-
# font_size=54,
|
485 |
-
# falling_speed=5,
|
486 |
-
# animation_length="infinite",
|
487 |
-
# )
|
488 |
|
489 |
-
|
490 |
else:
|
491 |
st.warning("Notice 1: There is a difficulity predicting your text, it might fill into one of the below:", icon="β οΈ")
|
492 |
st.success(f"1- **{ridge_name}**", icon="β
")
|
493 |
st.success(f"2- **{cnn_name}**", icon="β
")
|
494 |
st.success(f"3- **{extra_trees_name}**", icon="β
")
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
# emoji="π",
|
499 |
-
# font_size=54,
|
500 |
-
# falling_speed=5,
|
501 |
-
# animation_length="infinite",
|
502 |
-
# )
|
503 |
-
|
504 |
-
|
505 |
-
# with st.expander("What is this project about?"):
|
506 |
-
# st.write("""
|
507 |
-
# This project is part of an MSc in Data Analytics at the University of Portsmouth.
|
508 |
-
# Developed by Jaifar Al Shizawi, it aims to identify whether a text is written by a human or a specific Large Language Model (LLM) like ChatGPT-3, ChatGPT-4, Google Bard, or HuggingChat.
|
509 |
-
# For inquiries, contact [[email protected]](mailto:[email protected]).
|
510 |
-
# Supervised by Dr. Mohamed Bader.
|
511 |
-
# """)
|
512 |
-
|
513 |
-
# for author, prob in sorted_probabilities:
|
514 |
-
# display_name = author_map.get(author, author) # Retrieve the display name, fall back to original if not found
|
515 |
-
# st.write(f"{display_name}: {prob * 100:.2f}%")
|
516 |
-
# st.progress(float(prob))
|
517 |
|
518 |
# Using expander to make FAQ sections
|
519 |
st.subheader("Frequently Asked Questions (FAQ)")
|
@@ -566,11 +431,6 @@ with st.expander("Can I use this as evidence?"):
|
|
566 |
""")
|
567 |
|
568 |
|
569 |
-
# # Creates a button named 'Press me'
|
570 |
-
# list_dir = st.button("list")
|
571 |
-
# if list_dir:
|
572 |
-
# st.write("Listing directory contents:")
|
573 |
-
# st.write(os.listdir('.'))
|
574 |
|
575 |
|
576 |
|
|
|
332 |
max_cnn_prob_name = sorted_probabilities[0][0]
|
333 |
max_cnn_prob = float(sorted_probabilities[0][1])
|
334 |
|
335 |
+
if word_count < 10 or word_count > 1081:
|
336 |
+
st.info("For better prediction input texts between 10 and 1081", icon="βΉοΈ")
|
337 |
+
|
338 |
+
elif word_count < 256:
|
339 |
+
if ridge_prediction == extra_trees_prediction == predicted_author:
|
340 |
+
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
341 |
+
st.info("We are quite confident in the accuracy of this result.", icon="βΉοΈ")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
|
343 |
elif ridge_prediction == predicted_author:
|
344 |
+
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
345 |
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
|
|
346 |
st.write("_" * 30)
|
347 |
+
|
348 |
+
elif extra_trees_prediction == predicted_author:
|
349 |
+
st.success(f"Most likely written by: **{cnn_name}**", icon="β
")
|
350 |
+
st.success(f"2nd Most likely written by: **{ridge_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
st.write("_" * 30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
352 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
353 |
else:
|
354 |
+
st.warning("Notice 1: There is a difficulity predicting your text, it might fill into one of the below:", icon="β οΈ")
|
355 |
+
st.success(f"1- **{cnn_name}**", icon="β
")
|
356 |
+
st.success(f"2- **{ridge_name}**", icon="β
")
|
357 |
+
st.success(f"3- **{extra_trees_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
|
359 |
+
else:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
360 |
if ridge_prediction == extra_trees_prediction == predicted_author:
|
361 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
362 |
+
st.info("We are quite confident in the accuracy of this result.", icon="βΉοΈ")
|
363 |
+
|
364 |
+
elif ridge_prediction == predicted_author:
|
365 |
+
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
366 |
+
st.success(f"2nd Most likely written by: **{extra_trees_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
367 |
st.write("_" * 30)
|
368 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
369 |
elif ridge_prediction == extra_trees_prediction:
|
370 |
st.success(f"Most likely written by: **{ridge_name}**", icon="β
")
|
371 |
st.success(f"2nd Most likely written by: **{cnn_name}**", icon="β
")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
st.write("_" * 30)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
373 |
|
|
|
374 |
else:
|
375 |
st.warning("Notice 1: There is a difficulity predicting your text, it might fill into one of the below:", icon="β οΈ")
|
376 |
st.success(f"1- **{ridge_name}**", icon="β
")
|
377 |
st.success(f"2- **{cnn_name}**", icon="β
")
|
378 |
st.success(f"3- **{extra_trees_name}**", icon="β
")
|
379 |
+
|
380 |
+
|
381 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
382 |
|
383 |
# Using expander to make FAQ sections
|
384 |
st.subheader("Frequently Asked Questions (FAQ)")
|
|
|
431 |
""")
|
432 |
|
433 |
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
|
436 |
|