jaifar530 commited on
Commit
1cb239d
·
unverified ·
1 Parent(s): ab23659
Files changed (1) hide show
  1. app.py +132 -129
app.py CHANGED
@@ -1,130 +1,133 @@
1
- import os
2
- import requests
3
- import subprocess # Import the subprocess module
4
- from keras.models import load_model
5
- from keras.preprocessing.text import Tokenizer
6
- from keras.preprocessing.sequence import pad_sequences
7
- from sklearn.preprocessing import LabelEncoder
8
- import pickle
9
- import numpy as np
10
  import streamlit as st
11
-
12
- # Custom headers for the HTTP request
13
- headers = {
14
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
15
- }
16
-
17
- # Debugging: Print current working directory initially
18
- st.write(f"Initial Current Working Directory: {os.getcwd()}")
19
-
20
- # Check if the model folder exists
21
- zip_file_path = "my_authorship_model_zip.zip"
22
- if not os.path.exists('my_authorship_model'):
23
- try:
24
- # Download the model
25
- model_url = 'https://jaifar.net/ADS/my_authorship_model_zip.zip'
26
- r = requests.get(model_url, headers=headers)
27
- r.raise_for_status()
28
-
29
- # Debugging: Check if download is successful by examining content length
30
- st.write(f"Downloaded model size: {len(r.content)} bytes")
31
-
32
- # Save the downloaded content
33
- with open(zip_file_path, "wb") as f:
34
- f.write(r.content)
35
-
36
- # Debugging: Verify that the zip file exists
37
- if os.path.exists(zip_file_path):
38
- st.write("Zip file exists")
39
-
40
- # Debugging: List contents of the zip file using unzip
41
- subprocess.run(['unzip', '-l', zip_file_path])
42
-
43
- # Extract the model using unzip
44
- unzip_result = subprocess.run(['unzip', '-o', zip_file_path, '-d', 'my_authorship_model'])
45
-
46
- # Debugging: Check unzip exit code (0 means success)
47
- if unzip_result.returncode == 0:
48
- st.write("Model folder successfully extracted using unzip")
49
- # Debugging: List the directory contents after extraction
50
- st.write("Listing directory contents:")
51
- st.write(os.listdir('.'))
52
- else:
53
- st.write("Model folder was not extracted successfully using unzip")
54
- exit(1)
55
- else:
56
- st.write("Zip file does not exist")
57
- exit(1)
58
- except Exception as e:
59
- st.write(f"Failed to download or extract the model: {e}")
60
- exit(1)
61
- else:
62
- st.write("Model folder exists")
63
-
64
- # Debugging: Print current working directory after extraction
65
- st.write(f"Current Working Directory After Extraction: {os.getcwd()}")
66
-
67
- # Debugging: Check if model folder contains required files
68
- try:
69
- model_files = os.listdir('my_authorship_model')
70
- st.write(f"Files in model folder: {model_files}")
71
- except Exception as e:
72
- st.write(f"Could not list files in model folder: {e}")
73
-
74
- # Download the required files
75
- file_urls = {
76
- 'tokenizer.pkl': 'https://jaifar.net/ADS/tokenizer.pkl',
77
- 'label_encoder.pkl': 'https://jaifar.net/ADS/label_encoder.pkl'
78
- }
79
-
80
- for filename, url in file_urls.items():
81
- try:
82
- r = requests.get(url, headers=headers)
83
- r.raise_for_status()
84
- with open(filename, 'wb') as f:
85
- f.write(r.content)
86
- except Exception as e:
87
- st.write(f"Failed to download {filename}: {e}")
88
- exit(1)
89
-
90
- # Load the saved model
91
- loaded_model = load_model("my_authorship_model")
92
-
93
- # Load the saved tokenizer and label encoder
94
- with open('tokenizer.pkl', 'rb') as handle:
95
- tokenizer = pickle.load(handle)
96
-
97
- with open('label_encoder.pkl', 'rb') as handle:
98
- label_encoder = pickle.load(handle)
99
-
100
- max_length = 300 # As defined in the training code
101
-
102
- # Function to predict author for new text
103
- def predict_author(new_text, model, tokenizer, label_encoder):
104
- sequence = tokenizer.texts_to_sequences([new_text])
105
- padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
106
- prediction = model.predict(padded_sequence)
107
-
108
- predicted_label = label_encoder.inverse_transform([prediction.argmax()])[0]
109
- probabilities = prediction[0]
110
- author_probabilities = {}
111
- for idx, prob in enumerate(probabilities):
112
- author = label_encoder.inverse_transform([idx])[0]
113
- author_probabilities[author] = prob
114
-
115
- return predicted_label, author_probabilities
116
-
117
- st.markdown("CNN : version: 1.2")
118
- new_text = st.text_area("Input your text here")
119
-
120
- # Creates a button named 'Press me'
121
- press_me_button = st.button("Which Model Used?")
122
-
123
- if press_me_button:
124
- predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
125
- sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
126
-
127
- st.write(f"The text is most likely written by: {predicted_author}")
128
- st.write("Probabilities for each author are (sorted):")
129
- for author, prob in sorted_probabilities:
130
- st.write(f"{author}: {prob * 100:.2f}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ st.write("Test system if working")
3
+
4
+ # import os
5
+ # import requests
6
+ # import subprocess # Import the subprocess module
7
+ # from keras.models import load_model
8
+ # from keras.preprocessing.text import Tokenizer
9
+ # from keras.preprocessing.sequence import pad_sequences
10
+ # from sklearn.preprocessing import LabelEncoder
11
+ # import pickle
12
+ # import numpy as np
13
+
14
+
15
+ # # Custom headers for the HTTP request
16
+ # headers = {
17
+ # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
18
+ # }
19
+
20
+ # # Debugging: Print current working directory initially
21
+ # st.write(f"Initial Current Working Directory: {os.getcwd()}")
22
+
23
+ # # Check if the model folder exists
24
+ # zip_file_path = "my_authorship_model_zip.zip"
25
+ # if not os.path.exists('my_authorship_model'):
26
+ # try:
27
+ # # Download the model
28
+ # model_url = 'https://jaifar.net/ADS/my_authorship_model_zip.zip'
29
+ # r = requests.get(model_url, headers=headers)
30
+ # r.raise_for_status()
31
+
32
+ # # Debugging: Check if download is successful by examining content length
33
+ # st.write(f"Downloaded model size: {len(r.content)} bytes")
34
+
35
+ # # Save the downloaded content
36
+ # with open(zip_file_path, "wb") as f:
37
+ # f.write(r.content)
38
+
39
+ # # Debugging: Verify that the zip file exists
40
+ # if os.path.exists(zip_file_path):
41
+ # st.write("Zip file exists")
42
+
43
+ # # Debugging: List contents of the zip file using unzip
44
+ # subprocess.run(['unzip', '-l', zip_file_path])
45
+
46
+ # # Extract the model using unzip
47
+ # unzip_result = subprocess.run(['unzip', '-o', zip_file_path, '-d', 'my_authorship_model'])
48
+
49
+ # # Debugging: Check unzip exit code (0 means success)
50
+ # if unzip_result.returncode == 0:
51
+ # st.write("Model folder successfully extracted using unzip")
52
+ # # Debugging: List the directory contents after extraction
53
+ # st.write("Listing directory contents:")
54
+ # st.write(os.listdir('.'))
55
+ # else:
56
+ # st.write("Model folder was not extracted successfully using unzip")
57
+ # exit(1)
58
+ # else:
59
+ # st.write("Zip file does not exist")
60
+ # exit(1)
61
+ # except Exception as e:
62
+ # st.write(f"Failed to download or extract the model: {e}")
63
+ # exit(1)
64
+ # else:
65
+ # st.write("Model folder exists")
66
+
67
+ # # Debugging: Print current working directory after extraction
68
+ # st.write(f"Current Working Directory After Extraction: {os.getcwd()}")
69
+
70
+ # # Debugging: Check if model folder contains required files
71
+ # try:
72
+ # model_files = os.listdir('my_authorship_model')
73
+ # st.write(f"Files in model folder: {model_files}")
74
+ # except Exception as e:
75
+ # st.write(f"Could not list files in model folder: {e}")
76
+
77
+ # # Download the required files
78
+ # file_urls = {
79
+ # 'tokenizer.pkl': 'https://jaifar.net/ADS/tokenizer.pkl',
80
+ # 'label_encoder.pkl': 'https://jaifar.net/ADS/label_encoder.pkl'
81
+ # }
82
+
83
+ # for filename, url in file_urls.items():
84
+ # try:
85
+ # r = requests.get(url, headers=headers)
86
+ # r.raise_for_status()
87
+ # with open(filename, 'wb') as f:
88
+ # f.write(r.content)
89
+ # except Exception as e:
90
+ # st.write(f"Failed to download {filename}: {e}")
91
+ # exit(1)
92
+
93
+ # # Load the saved model
94
+ # loaded_model = load_model("my_authorship_model")
95
+
96
+ # # Load the saved tokenizer and label encoder
97
+ # with open('tokenizer.pkl', 'rb') as handle:
98
+ # tokenizer = pickle.load(handle)
99
+
100
+ # with open('label_encoder.pkl', 'rb') as handle:
101
+ # label_encoder = pickle.load(handle)
102
+
103
+ # max_length = 300 # As defined in the training code
104
+
105
+ # # Function to predict author for new text
106
+ # def predict_author(new_text, model, tokenizer, label_encoder):
107
+ # sequence = tokenizer.texts_to_sequences([new_text])
108
+ # padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post')
109
+ # prediction = model.predict(padded_sequence)
110
+
111
+ # predicted_label = label_encoder.inverse_transform([prediction.argmax()])[0]
112
+ # probabilities = prediction[0]
113
+ # author_probabilities = {}
114
+ # for idx, prob in enumerate(probabilities):
115
+ # author = label_encoder.inverse_transform([idx])[0]
116
+ # author_probabilities[author] = prob
117
+
118
+ # return predicted_label, author_probabilities
119
+
120
+ # st.markdown("CNN : version: 1.2")
121
+ # new_text = st.text_area("Input your text here")
122
+
123
+ # # Creates a button named 'Press me'
124
+ # press_me_button = st.button("Which Model Used?")
125
+
126
+ # if press_me_button:
127
+ # predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder)
128
+ # sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True)
129
+
130
+ # st.write(f"The text is most likely written by: {predicted_author}")
131
+ # st.write("Probabilities for each author are (sorted):")
132
+ # for author, prob in sorted_probabilities:
133
+ # st.write(f"{author}: {prob * 100:.2f}%")