NORLIE JHON MALAGDAO commited on
Commit
2ae2cbd
·
verified ·
1 Parent(s): f8c68bd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +306 -79
app.py CHANGED
@@ -8,7 +8,18 @@ import tensorflow as tf
8
  from tensorflow import keras
9
  from tensorflow.keras import layers
10
  from tensorflow.keras.models import Sequential
 
 
 
 
 
11
 
 
 
 
 
 
 
12
 
13
  from PIL import Image
14
  import gdown
@@ -16,6 +27,9 @@ import zipfile
16
 
17
  import pathlib
18
 
 
 
 
19
  # Define the Google Drive shareable link
20
  gdrive_url = 'https://drive.google.com/file/d/1HjHYlQyRz5oWt8kehkt1TiOGRRlKFsv8/view?usp=drive_link'
21
 
@@ -55,113 +69,326 @@ for root, dirs, files in os.walk(extracted_path):
55
  for f in files:
56
  print(f"{subindent}{f}")
57
 
58
- import pathlib
59
  # Path to the dataset directory
60
  data_dir = pathlib.Path('extracted_files/Pest_Dataset')
61
  data_dir = pathlib.Path(data_dir)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- bees = list(data_dir.glob('bees/*'))
65
- print(bees[0])
66
- PIL.Image.open(str(bees[0]))
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- bees = list(data_dir.glob('bees/*'))
70
- print(bees[0])
71
- PIL.Image.open(str(bees[0]))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
 
74
- img_height,img_width=180,180
75
- batch_size=32
76
- train_ds = tf.keras.preprocessing.image_dataset_from_directory(
77
- data_dir,
78
- validation_split=0.2,
79
- subset="training",
80
- seed=123,
81
- image_size=(img_height, img_width),
82
- batch_size=batch_size)
83
 
 
 
 
 
 
 
 
84
 
85
- val_ds = tf.keras.preprocessing.image_dataset_from_directory(
86
- data_dir,
87
- validation_split=0.2,
88
- subset="validation",
89
- seed=123,
90
- image_size=(img_height, img_width),
91
- batch_size=batch_size)
92
 
 
 
 
 
 
 
93
 
94
- class_names = train_ds.class_names
95
- print(class_names)
96
 
 
 
97
 
98
- import matplotlib.pyplot as plt
 
 
 
 
 
 
99
 
100
- plt.figure(figsize=(10, 10))
101
- for images, labels in train_ds.take(1):
102
- for i in range(9):
103
- ax = plt.subplot(3, 3, i + 1)
104
- plt.imshow(images[i].numpy().astype("uint8"))
105
- plt.title(class_names[labels[i]])
106
- plt.axis("off")
107
-
108
-
109
- data_augmentation = keras.Sequential(
110
- [
111
- layers.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)),
112
- layers.RandomRotation(0.1),
113
- layers.RandomZoom(0.1),
114
- ]
115
  )
116
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- plt.figure(figsize=(10, 10))
119
- for images, _ in train_ds.take(1):
120
- for i in range(9):
121
- augmented_images = data_augmentation(images)
122
- ax = plt.subplot(3, 3, i + 1)
123
- plt.imshow(augmented_images[0].numpy().astype("uint8"))
124
- plt.axis("off")
125
-
126
-
127
- num_classes = len(class_names)
128
- model = Sequential([
129
- data_augmentation,
130
- layers.Rescaling(1./255),
131
- layers.Conv2D(16, 3, padding='same', activation='relu'),
132
- layers.MaxPooling2D(),
133
- layers.Conv2D(32, 3, padding='same', activation='relu'),
134
- layers.MaxPooling2D(),
135
- layers.Conv2D(64, 3, padding='same', activation='relu'),
136
- layers.MaxPooling2D(),
137
- layers.Dropout(0.2),
138
- layers.Flatten(),
139
- layers.Dense(128, activation='relu'),
140
- layers.Dense(num_classes, activation='softmax', name="outputs") # Use softmax here
141
- ])
142
 
143
- model.compile(optimizer='adam',
144
- loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False), # Change from_logits to False
145
- metrics=['accuracy'])
146
 
147
- model.summary()
 
148
 
 
 
149
 
150
- epochs = 15
151
- history = model.fit(
152
- train_ds,
153
- validation_data=val_ds,
154
- epochs=epochs
155
- )
156
 
 
 
 
 
 
157
 
158
- import gradio as gr
159
- import numpy as np
160
- import tensorflow as tf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
 
162
  def predict_image(img):
163
  img = np.array(img)
164
- img_resized = tf.image.resize(img, (180, 180))
165
  img_4d = tf.expand_dims(img_resized, axis=0)
166
  prediction = model.predict(img_4d)[0]
167
  return {class_names[i]: float(prediction[i]) for i in range(len(class_names))}
 
8
  from tensorflow import keras
9
  from tensorflow.keras import layers
10
  from tensorflow.keras.models import Sequential
11
+ from tensorflow.keras.preprocessing.image import ImageDataGenerator
12
+ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
13
+ from tensorflow.keras.optimizers import Adam
14
+ from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
15
+ from tensorflow.keras.models import Model
16
 
17
+ from sklearn.model_selection import train_test_split
18
+ from sklearn.metrics import classification_report
19
+
20
+ import pandas as pd
21
+ import random
22
+ import cv2
23
 
24
  from PIL import Image
25
  import gdown
 
27
 
28
  import pathlib
29
 
30
+ # Ensure that these imports are at the beginning of your script to avoid any NameError issues.
31
+
32
+
33
  # Define the Google Drive shareable link
34
  gdrive_url = 'https://drive.google.com/file/d/1HjHYlQyRz5oWt8kehkt1TiOGRRlKFsv8/view?usp=drive_link'
35
 
 
69
  for f in files:
70
  print(f"{subindent}{f}")
71
 
 
72
  # Path to the dataset directory
73
  data_dir = pathlib.Path('extracted_files/Pest_Dataset')
74
  data_dir = pathlib.Path(data_dir)
75
 
76
+ # Read images and labels into a DataFrame
77
+ image_paths = list(data_dir.glob('*/*.jpg'))
78
+ image_labels = [str(path.parent.name) for path in image_paths]
79
+ image_df = pd.DataFrame({'Filepath': image_paths, 'Label': image_labels})
80
+
81
+ # Display distribution of labels
82
+ label_counts = image_df['Label'].value_counts()
83
+ plt.figure(figsize=(10, 6))
84
+ sns.barplot(x=label_counts.index, y=label_counts.values, alpha=0.8, palette='rocket')
85
+ plt.title('Distribution of Labels in Image Dataset', fontsize=16)
86
+ plt.xlabel('Label', fontsize=14)
87
+ plt.ylabel('Count', fontsize=14)
88
+ plt.xticks(rotation=45)
89
+ plt.show()
90
+
91
+ # Display 16 pictures of the dataset with their labels
92
+ random_index = np.random.randint(0, len(image_df), 16)
93
+ fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(10, 10),
94
+ subplot_kw={'xticks': [], 'yticks': []})
95
+ for i, ax in enumerate(axes.flat):
96
+ ax.imshow(plt.imread(image_df.Filepath[random_index[i]]))
97
+ ax.set_title(image_df.Label[random_index[i]])
98
+ plt.tight_layout()
99
+ plt.show()
100
+
101
+ # Function to return a random image path from a given directory
102
+ def random_sample(directory):
103
+ images = [os.path.join(directory, img) for img in os.listdir(directory) if img.endswith(('.jpg', '.jpeg', '.png'))]
104
+ return random.choice(images)
105
+
106
+ # Function to compute the Error Level Analysis (ELA) of an image
107
+ def compute_ela_cv(path, quality):
108
+ temp_filename = 'temp.jpg'
109
+ orig = cv2.imread(path)
110
+ cv2.imwrite(temp_filename, orig, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
111
+ compressed = cv2.imread(temp_filename)
112
+ ela_image = cv2.absdiff(orig, compressed)
113
+ ela_image = np.clip(ela_image * 10, 0, 255).astype(np.uint8)
114
+ return ela_image
115
+
116
+ # View random sample from the dataset
117
+ p = random_sample('extracted_files/Pest_Dataset/beetle')
118
+ orig = cv2.imread(p)
119
+ orig = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) / 255.0
120
+ init_val = 100
121
+ columns = 3
122
+ rows = 3
123
+
124
+ fig = plt.figure(figsize=(15, 10))
125
+ for i in range(1, columns*rows + 1):
126
+ quality = init_val - (i-1) * 8
127
+ img = compute_ela_cv(path=p, quality=quality)
128
+ if i == 1:
129
+ img = orig.copy()
130
+ ax = fig.add_subplot(rows, columns, i)
131
+ ax.title.set_text(f'q: {quality}')
132
+ plt.imshow(img)
133
+ plt.show()
134
+
135
+ # Read images and labels into a DataFrame
136
+ image_paths = list(data_dir.glob('*/*.jpg'))
137
+ image_labels = [str(path.parent.name) for path in image_paths]
138
+ image_df = pd.DataFrame({'Filepath': [str(path) for path in image_paths], 'Label': image_labels})
139
+
140
+ # Separate into train and test data
141
+ train_df, test_df = train_test_split(image_df, test_size=0.2, shuffle=True, random_state=42)
142
+
143
+ train_generator = ImageDataGenerator(
144
+ preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input,
145
+ validation_split=0.2
146
+ )
147
 
148
+ test_generator = ImageDataGenerator(
149
+ preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input
150
+ )
151
 
152
+ # Split the data into three categories
153
+ train_images = train_generator.flow_from_dataframe(
154
+ dataframe=train_df,
155
+ x_col='Filepath',
156
+ y_col='Label',
157
+ target_size=(224, 224),
158
+ color_mode='rgb',
159
+ class_mode='categorical',
160
+ batch_size=32,
161
+ shuffle=True,
162
+ seed=42,
163
+ subset='training'
164
+ )
165
 
166
+ val_images = train_generator.flow_from_dataframe(
167
+ dataframe=train_df,
168
+ x_col='Filepath',
169
+ y_col='Label',
170
+ target_size=(224, 224),
171
+ color_mode='rgb',
172
+ class_mode='categorical',
173
+ batch_size=32,
174
+ shuffle=True,
175
+ seed=42,
176
+ subset='validation'
177
+ )
178
+
179
+ test_images = test_generator.flow_from_dataframe(
180
+ dataframe=test_df,
181
+ x_col='Filepath',
182
+ y_col='Label',
183
+ target_size=(224, 224),
184
+ color_mode='rgb',
185
+ class_mode='categorical',
186
+ batch_size=32,
187
+ shuffle=False
188
+ )
189
 
190
 
191
+ # Data Augmentation Step
192
+ augment = tf.keras.Sequential([
193
+ layers.experimental.preprocessing.Resizing(224, 224),
194
+ layers.experimental.preprocessing.Rescaling(1./255),
195
+ layers.experimental.preprocessing.RandomFlip("horizontal"),
196
+ layers.experimental.preprocessing.RandomRotation(0.1),
197
+ layers.experimental.preprocessing.RandomZoom(0.1),
198
+ layers.experimental.preprocessing.RandomContrast(0.1),
199
+ ])
200
 
201
+ # Load the pretrained model
202
+ pretrained_model = tf.keras.applications.efficientnet_v2.EfficientNetV2L(
203
+ input_shape=(224, 224, 3),
204
+ include_top=False,
205
+ weights='imagenet',
206
+ pooling='max'
207
+ )
208
 
209
+ pretrained_model.trainable = False
 
 
 
 
 
 
210
 
211
+ # Create checkpoint callback
212
+ checkpoint_path = "pests_cats_classification_model_checkpoint"
213
+ checkpoint_callback = ModelCheckpoint(checkpoint_path,
214
+ save_weights_only=True,
215
+ monitor="val_accuracy",
216
+ save_best_only=True)
217
 
218
+ # Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 5 epochs
219
+ early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
220
 
221
+ inputs = pretrained_model.input
222
+ x = augment(inputs)
223
 
224
+ # Add new classification layers
225
+ x = Flatten()(pretrained_model.output)
226
+ x = Dense(256, activation='relu')(x)
227
+ x = Dropout(0.5)(x)
228
+ x = BatchNormalization()(x)
229
+ x = Dense(128, activation='relu')(x)
230
+ x = Dropout(0.5)(x)
231
 
232
+ outputs = Dense(12, activation='softmax')(x)
233
+
234
+ model = Model(inputs=inputs, outputs=outputs)
235
+
236
+ model.compile(
237
+ optimizer=Adam(0.00001),
238
+ loss='categorical_crossentropy',
239
+ metrics=['accuracy']
 
 
 
 
 
 
 
240
  )
241
 
242
+ # Train the model
243
+ history = model.fit(
244
+ train_images,
245
+ steps_per_epoch=len(train_images),
246
+ validation_data=val_images,
247
+ validation_steps=len(val_images),
248
+ epochs=20, # Change epochs to 20
249
+ callbacks=[
250
+ early_stopping,
251
+ checkpoint_callback,
252
+ ]
253
+ )
254
 
255
+ results = model.evaluate(test_images, verbose=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
256
 
257
+ print(" Test Loss: {:.5f}".format(results[0]))
258
+ print("Test Accuracy: {:.2f}%".format(results[1] * 100))
 
259
 
260
+ accuracy = history.history['accuracy']
261
+ val_accuracy = history.history['val_accuracy']
262
 
263
+ loss = history.history['loss']
264
+ val_loss = history.history['val_loss']
265
 
266
+ epochs = range(len(accuracy))
267
+ plt.plot(epochs, accuracy, 'b', label='Training accuracy')
268
+ plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy')
 
 
 
269
 
270
+ plt.title('Training and validation accuracy')
271
+ plt.legend()
272
+ plt.figure()
273
+ plt.plot(epochs, loss, 'b', label='Training loss')
274
+ plt.plot(epochs, val_loss, 'r', label='Validation loss')
275
 
276
+ plt.title('Training and validation loss')
277
+ plt.legend()
278
+ plt.show()
279
+
280
+ # Predict the label of the test_images
281
+ pred = model.predict(test_images)
282
+ pred = np.argmax(pred, axis=1)
283
+
284
+ # Map the label
285
+ labels = (train_images.class_indices)
286
+ labels = dict((v, k) for k, v in labels.items())
287
+ pred = [labels[k] for k in pred]
288
+
289
+ # Display the result
290
+ print(f'The first 5 predictions: {pred[:5]}')
291
+
292
+ # Display 25 random pictures from the dataset with their labels
293
+ random_index = np.random.randint(0, len(test_df) - 1, 15)
294
+ fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(25, 15),
295
+ subplot_kw={'xticks': [], 'yticks': []})
296
+
297
+ for i, ax in enumerate(axes.flat):
298
+ ax.imshow(plt.imread(test_df.Filepath.iloc[random_index[i]]))
299
+ if test_df.Label.iloc[random_index[i]] == pred[random_index[i]]:
300
+ color = "green"
301
+ else:
302
+ color = "red"
303
+ ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}", color=color)
304
+ plt.show()
305
+ plt.tight_layout()
306
+
307
+ y_test = list(test_df.Label)
308
+ print(classification_report(y_test, pred))
309
+
310
+ report = classification_report(y_test, pred, output_dict=True)
311
+ df = pd.DataFrame(report).transpose()
312
+ df
313
+
314
+ # Define function to get image array
315
+ def get_img_array(img_path, size):
316
+ img = tf.keras.preprocessing.image.load_img(img_path, target_size=size)
317
+ array = tf.keras.preprocessing.image.img_to_array(img)
318
+ array = np.expand_dims(array, axis=0)
319
+ return array
320
+
321
+ # Define function to make Grad-CAM heatmap
322
+ def make_gradcam_heatmap(img_array, model, last_conv_layer_name, classifier_layer_names=None):
323
+ grad_model = tf.keras.models.Model(
324
+ [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
325
+ )
326
+
327
+ with tf.GradientTape() as tape:
328
+ conv_outputs, predictions = grad_model(img_array)
329
+ loss = predictions[:, np.argmax(predictions[0])]
330
+
331
+ output = conv_outputs[0]
332
+ grads = tape.gradient(loss, conv_outputs)[0]
333
+
334
+ gate_f = tf.cast(output > 0, "float32")
335
+ gate_r = tf.cast(grads > 0, "float32")
336
+ guided_grads = grads * gate_f * gate_r
337
+
338
+ weights = tf.reduce_mean(guided_grads, axis=(0, 1))
339
+
340
+ cam = np.zeros(output.shape[0:2], dtype=np.float32)
341
+
342
+ for i, w in enumerate(weights):
343
+ cam += w * output[:, :, i]
344
+
345
+ cam = cv2.resize(cam.numpy(), (img_array.shape[2], img_array.shape[1]))
346
+ cam = np.maximum(cam, 0)
347
+ heatmap = cam / cam.max()
348
+
349
+ return heatmap
350
+
351
+ # Define function to save and display Grad-CAM
352
+ def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
353
+ img = tf.keras.preprocessing.image.load_img(img_path)
354
+ img = tf.keras.preprocessing.image.img_to_array(img)
355
+
356
+ heatmap = np.uint8(255 * heatmap)
357
+
358
+ jet = cm.get_cmap("jet")
359
+
360
+ jet_colors = jet(np.arange(256))[:, :3]
361
+ jet_heatmap = jet_colors[heatmap]
362
+
363
+ jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
364
+ jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
365
+ jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)
366
+
367
+ superimposed_img = jet_heatmap * alpha + img
368
+ superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img)
369
+
370
+ superimposed_img.save(cam_path)
371
+
372
+ return cam_path
373
+
374
+ # Display the part of the pictures used by the neural network to classify the pictures
375
+ fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 10),
376
+ subplot_kw={'xticks': [], 'yticks': []})
377
+
378
+ for i, ax in enumerate(axes.flat):
379
+ img_path = test_df.Filepath.iloc[random_index[i]]
380
+ img_array = tf.keras.applications.efficientnet_v2.preprocess_input(get_img_array(img_path, size=(224, 224)))
381
+ heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name="top_conv")
382
+ cam_path = save_and_display_gradcam(img_path, heatmap)
383
+ ax.imshow(plt.imread(cam_path))
384
+ ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}")
385
+ plt.tight_layout()
386
+ plt.show()
387
 
388
+ # Define Gradio interface
389
  def predict_image(img):
390
  img = np.array(img)
391
+ img_resized = tf.image.resize(img, (224, 224))
392
  img_4d = tf.expand_dims(img_resized, axis=0)
393
  prediction = model.predict(img_4d)[0]
394
  return {class_names[i]: float(prediction[i]) for i in range(len(class_names))}