jaifar530 commited on
Commit
8e0cc30
·
unverified ·
1 Parent(s): 4da997f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -130
app.py CHANGED
@@ -1,133 +1,133 @@
1
  import streamlit as st
2
  st.write("Test system if working")
3
 
4
- # import os
5
- # import requests
6
- # import subprocess # Import the subprocess module
7
- # from keras.models import load_model
8
- # from keras.preprocessing.text import Tokenizer
9
- # from keras.preprocessing.sequence import pad_sequences
10
- # from sklearn.preprocessing import LabelEncoder
11
- # import pickle
12
- # import numpy as np
13
-
14
-
15
- # # Custom headers for the HTTP request
16
- # headers = {
17
- # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
18
- # }
19
-
20
- # # Debugging: Print current working directory initially
21
- # st.write(f"Initial Current Working Directory: {os.getcwd()}")
22
-
23
- # # Check if the model folder exists
24
- # zip_file_path = "my_authorship_model_zip.zip"
25
- # if not os.path.exists('my_authorship_model'):
26
- # try:
27
- # # Download the model
28
- # model_url = 'https://jaifar.net/ADS/my_authorship_model_zip.zip'
29
- # r = requests.get(model_url, headers=headers)
30
- # r.raise_for_status()
31
-
32
- # # Debugging: Check if download is successful by examining content length
33
- # st.write(f"Downloaded model size: {len(r.content)} bytes")
34
-
35
- # # Save the downloaded content
36
- # with open(zip_file_path, "wb") as f:
37
- # f.write(r.content)
38
-
39
- # # Debugging: Verify that the zip file exists
40
- # if os.path.exists(zip_file_path):
41
- # st.write("Zip file exists")
42
-
43
- # # Debugging: List contents of the zip file using unzip
44
- # subprocess.run(['unzip', '-l', zip_file_path])
45
-
46
- # # Extract the model using unzip
47
- # unzip_result = subprocess.run(['unzip', '-o', zip_file_path, '-d', 'my_authorship_model'])
48
-
49
- # # Debugging: Check unzip exit code (0 means success)
50
- # if unzip_result.returncode == 0:
51
- # st.write("Model folder successfully extracted using unzip")
52
- # # Debugging: List the directory contents after extraction
53
- # st.write("Listing directory contents:")
54
- # st.write(os.listdir('.'))
55
- # else:
56
- # st.write("Model folder was not extracted successfully using unzip")
57
- # exit(1)
58
- # else:
59
- # st.write("Zip file does not exist")
60
- # exit(1)
61
- # except Exception as e:
62
- # st.write(f"Failed to download or extract the model: {e}")
63
- # exit(1)
64
- # else:
65
- # st.write("Model folder exists")
66
-
67
- # # Debugging: Print current working directory after extraction
68
- # st.write(f"Current Working Directory After Extraction: {os.getcwd()}")
69
-
70
- # # Debugging: Check if model folder contains required files
71
- # try:
72
- # model_files = os.listdir('my_authorship_model')
73
- # st.write(f"Files in model folder: {model_files}")
74
- # except Exception as e:
75
- # st.write(f"Could not list files in model folder: {e}")
76
-
77
- # # Download the required files
78
- # file_urls = {
79
- # 'tokenizer.pkl': 'https://jaifar.net/ADS/tokenizer.pkl',
80
- # 'label_encoder.pkl': 'https://jaifar.net/ADS/label_encoder.pkl'
81
- # }
82
-
83
- # for filename, url in file_urls.items():
84
- # try:
85
- # r = requests.get(url, headers=headers)
86
- # r.raise_for_status()
87
- # with open(filename, 'wb') as f:
88
- # f.write(r.content)
89
- # except Exception as e:
90
- # st.write(f"Failed to download {filename}: {e}")
91
- # exit(1)
92
-
93
- # # Load the saved model
94
- # loaded_model = load_model("my_authorship_model")
95
-
96
- # # Load the saved tokenizer and label encoder
97
- # with open('tokenizer.pkl', 'rb') as handle:
98
- # tokenizer = pickle.load(handle)
99
-
100
- # with open('label_encoder.pkl', 'rb') as handle:
101
- # label_encoder = pickle.load(handle)
102
-
103
- # max_length = 300 # As defined in the training code
104
-
105
- # # Function to predict author for new text
106
- # def predict_author(new_text, model, tokenizer, label_encoder):
107
- # sequence = tokenizer.texts_to_sequences([new_text])
108
- # padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
109
- # prediction = model.predict(padded_sequence)
110
-
111
- # predicted_label = label_encoder.inverse_transform([prediction.argmax()])[0]
112
- # probabilities = prediction[0]
113
- # author_probabilities = {}
114
- # for idx, prob in enumerate(probabilities):
115
- # author = label_encoder.inverse_transform([idx])[0]
116
- # author_probabilities[author] = prob
117
-
118
- # return predicted_label, author_probabilities
119
-
120
- # st.markdown("CNN : version: 1.2")
121
- # new_text = st.text_area("Input your text here")
122
-
123
- # # Creates a button named 'Press me'
124
- # press_me_button = st.button("Which Model Used?")
125
-
126
- # if press_me_button:
127
- # predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
128
- # sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
129
-
130
- # st.write(f"The text is most likely written by: {predicted_author}")
131
- # st.write("Probabilities for each author are (sorted):")
132
- # for author, prob in sorted_probabilities:
133
- # st.write(f"{author}: {prob * 100:.2f}%")
 
1
  import streamlit as st
2
  st.write("Test system if working")
3
 
4
+ import os
5
+ import requests
6
+ import subprocess # Import the subprocess module
7
+ from keras.models import load_model
8
+ from keras.preprocessing.text import Tokenizer
9
+ from keras.preprocessing.sequence import pad_sequences
10
+ from sklearn.preprocessing import LabelEncoder
11
+ import pickle
12
+ import numpy as np
13
+
14
+
15
+ # Custom headers for the HTTP request
16
+ headers = {
17
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
18
+ }
19
+
20
+ # Debugging: Print current working directory initially
21
+ st.write(f"Initial Current Working Directory: {os.getcwd()}")
22
+
23
+ # Check if the model folder exists
24
+ zip_file_path = "my_authorship_model_zip.zip"
25
+ if not os.path.exists('my_authorship_model'):
26
+ try:
27
+ # Download the model
28
+ model_url = 'https://jaifar.net/ADS/my_authorship_model_zip.zip'
29
+ r = requests.get(model_url, headers=headers)
30
+ r.raise_for_status()
31
+
32
+ # Debugging: Check if download is successful by examining content length
33
+ st.write(f"Downloaded model size: {len(r.content)} bytes")
34
+
35
+ # Save the downloaded content
36
+ with open(zip_file_path, "wb") as f:
37
+ f.write(r.content)
38
+
39
+ # Debugging: Verify that the zip file exists
40
+ if os.path.exists(zip_file_path):
41
+ st.write("Zip file exists")
42
+
43
+ # Debugging: List contents of the zip file using unzip
44
+ subprocess.run(['unzip', '-l', zip_file_path])
45
+
46
+ # Extract the model using unzip
47
+ unzip_result = subprocess.run(['unzip', '-o', zip_file_path, '-d', 'my_authorship_model'])
48
+
49
+ # Debugging: Check unzip exit code (0 means success)
50
+ if unzip_result.returncode == 0:
51
+ st.write("Model folder successfully extracted using unzip")
52
+ # Debugging: List the directory contents after extraction
53
+ st.write("Listing directory contents:")
54
+ st.write(os.listdir('.'))
55
+ else:
56
+ st.write("Model folder was not extracted successfully using unzip")
57
+ exit(1)
58
+ else:
59
+ st.write("Zip file does not exist")
60
+ exit(1)
61
+ except Exception as e:
62
+ st.write(f"Failed to download or extract the model: {e}")
63
+ exit(1)
64
+ else:
65
+ st.write("Model folder exists")
66
+
67
+ # Debugging: Print current working directory after extraction
68
+ st.write(f"Current Working Directory After Extraction: {os.getcwd()}")
69
+
70
+ # Debugging: Check if model folder contains required files
71
+ try:
72
+ model_files = os.listdir('my_authorship_model')
73
+ st.write(f"Files in model folder: {model_files}")
74
+ except Exception as e:
75
+ st.write(f"Could not list files in model folder: {e}")
76
+
77
+ # Download the required files
78
+ file_urls = {
79
+ 'tokenizer.pkl': 'https://jaifar.net/ADS/tokenizer.pkl',
80
+ 'label_encoder.pkl': 'https://jaifar.net/ADS/label_encoder.pkl'
81
+ }
82
+
83
+ for filename, url in file_urls.items():
84
+ try:
85
+ r = requests.get(url, headers=headers)
86
+ r.raise_for_status()
87
+ with open(filename, 'wb') as f:
88
+ f.write(r.content)
89
+ except Exception as e:
90
+ st.write(f"Failed to download {filename}: {e}")
91
+ exit(1)
92
+
93
+ # Load the saved model
94
+ loaded_model = load_model("my_authorship_model")
95
+
96
+ # Load the saved tokenizer and label encoder
97
+ with open('tokenizer.pkl', 'rb') as handle:
98
+ tokenizer = pickle.load(handle)
99
+
100
+ with open('label_encoder.pkl', 'rb') as handle:
101
+ label_encoder = pickle.load(handle)
102
+
103
+ max_length = 300 # As defined in the training code
104
+
105
+ # Function to predict author for new text
106
+ def predict_author(new_text, model, tokenizer, label_encoder):
107
+ sequence = tokenizer.texts_to_sequences([new_text])
108
+ padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
109
+ prediction = model.predict(padded_sequence)
110
+
111
+ predicted_label = label_encoder.inverse_transform([prediction.argmax()])[0]
112
+ probabilities = prediction[0]
113
+ author_probabilities = {}
114
+ for idx, prob in enumerate(probabilities):
115
+ author = label_encoder.inverse_transform([idx])[0]
116
+ author_probabilities[author] = prob
117
+
118
+ return predicted_label, author_probabilities
119
+
120
+ st.markdown("CNN : version: 1.2")
121
+ new_text = st.text_area("Input your text here")
122
+
123
+ # Creates a button named 'Press me'
124
+ press_me_button = st.button("Which Model Used?")
125
+
126
+ if press_me_button:
127
+ predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
128
+ sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
129
+
130
+ st.write(f"The text is most likely written by: {predicted_author}")
131
+ st.write("Probabilities for each author are (sorted):")
132
+ for author, prob in sorted_probabilities:
133
+ st.write(f"{author}: {prob * 100:.2f}%")