jaifar530 commited on
Commit
e014439
·
unverified ·
1 Parent(s): 1084e8e

AI_vs_AI_RandomForest_88_Samples.pkl

Browse files
Files changed (1) hide show
  1. app.py +52 -6
app.py CHANGED
@@ -44,9 +44,9 @@ selected_option = st.selectbox('Select an Option', options)
44
 
45
 
46
  # Check if the file exists
47
- if not os.path.isfile('RandomForestClassifier.pkl'):
48
  # Download the zip file if it doesn't exist
49
- url = 'https://jaifar.net/RandomForestClassifier.pkl'
50
  headers = {
51
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
52
  }
@@ -54,7 +54,7 @@ if not os.path.isfile('RandomForestClassifier.pkl'):
54
  response = requests.get(url, headers=headers)
55
 
56
  # Save the file
57
- with open('RandomForestClassifier.pkl', 'wb') as file:
58
  file.write(response.content)
59
 
60
 
@@ -88,6 +88,52 @@ num_words = 500
88
  input_paragraph = ' '.join(word_tokenize(input_paragraph)[:num_words])
89
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
 
93
 
@@ -157,14 +203,14 @@ def add_vectorized_features(df):
157
  return df
158
 
159
 
160
- # Function define
161
  def AI_vs_AI_RandomForest_88_Samples(df):
162
 
163
  # At this point, the pickle file should exist, either it was already there, or it has been downloaded and extracted.
164
- with open('RandomForestClassifier.pkl', 'rb') as file:
165
  clf_loaded = pickle.load(file)
166
 
167
- input_features = df['paragraph'].apply(extract_features)
168
 
169
  predicted_llm = clf_loaded.predict(input_features)
170
  st.write(f"Predicted LLM: {predicted_llm[0]}")
 
44
 
45
 
46
  # Check if the file exists
47
+ if not os.path.isfile('AI_vs_AI_RandomForest_88_Samples.pkl'):
48
  # Download the zip file if it doesn't exist
49
+ url = 'https://jaifar.net/AI_vs_AI_RandomForest_88_Samples.pkl'
50
  headers = {
51
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
52
  }
 
54
  response = requests.get(url, headers=headers)
55
 
56
  # Save the file
57
+ with open('AI_vs_AI_RandomForest_88_Samples.pkl', 'wb') as file:
58
  file.write(response.content)
59
 
60
 
 
88
  input_paragraph = ' '.join(word_tokenize(input_paragraph)[:num_words])
89
 
90
 
91
+ # Extracting features
92
+ def extract_features_AI_vs_AI_RandomForest_88_Samples(text):
93
+ words = word_tokenize(text)
94
+ sentences = sent_tokenize(text)
95
+
96
+ avg_word_length = sum(len(word) for word in words if word.isalpha()) / len(words)
97
+ avg_sent_length = sum(len(sent) for sent in sentences) / len(sentences)
98
+ punctuation_count = len([char for char in text if char in '.,;:?!'])
99
+ stopword_count = len([word for word in words if word in stopwords.words('english')])
100
+
101
+ lemmatizer = WordNetLemmatizer()
102
+ lemma_count = len(set(lemmatizer.lemmatize(word) for word in words))
103
+
104
+ named_entity_count = len([chunk for chunk in ne_chunk(pos_tag(words)) if isinstance(chunk, Tree)])
105
+
106
+ tagged_words = nltk.pos_tag(words)
107
+ pos_counts = nltk.FreqDist(tag for (word, tag) in tagged_words)
108
+ pos_features = {
109
+ 'pos_IN': pos_counts['IN'],
110
+ 'pos_DT': pos_counts['DT'],
111
+ 'pos_NN': pos_counts['NN'],
112
+ 'pos_,': pos_counts[','],
113
+ 'pos_VBZ': pos_counts['VBZ'],
114
+ 'pos_WDT': pos_counts['WDT'],
115
+ 'pos_TO': pos_counts['TO'],
116
+ 'pos_VB': pos_counts['VB'],
117
+ 'pos_VBG': pos_counts['VBG'],
118
+ 'pos_.': pos_counts['.'],
119
+ 'pos_JJ': pos_counts['JJ'],
120
+ 'pos_NNS': pos_counts['NNS'],
121
+ 'pos_RB': pos_counts['RB'],
122
+ 'pos_CC': pos_counts['CC'],
123
+ 'pos_VBN': pos_counts['VBN'],
124
+ }
125
+
126
+ features = {
127
+ 'avg_word_length': avg_word_length,
128
+ 'avg_sent_length': avg_sent_length,
129
+ 'punctuation_count': punctuation_count,
130
+ 'stopword_count': stopword_count,
131
+ 'lemma_count': lemma_count,
132
+ 'named_entity_count': named_entity_count,
133
+ }
134
+ features.update(pos_features)
135
+
136
+ return pd.Series(features)
137
 
138
 
139
 
 
203
  return df
204
 
205
 
206
+ # Function define AI_vs_AI_RandomForest_88_Samples
207
  def AI_vs_AI_RandomForest_88_Samples(df):
208
 
209
  # At this point, the pickle file should exist, either it was already there, or it has been downloaded and extracted.
210
+ with open('AI_vs_AI_RandomForest_88_Samples.pkl', 'rb') as file:
211
  clf_loaded = pickle.load(file)
212
 
213
+ input_features = df['paragraph'].apply(extract_features_AI_vs_AI_RandomForest_88_Samples)
214
 
215
  predicted_llm = clf_loaded.predict(input_features)
216
  st.write(f"Predicted LLM: {predicted_llm[0]}")