import gradio as gr import matplotlib.pyplot as plt import numpy as np import os import PIL import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.models import Sequential from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping from tensorflow.keras.optimizers import Adam from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization from tensorflow.keras.models import Model from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report import pandas as pd import random import cv2 from PIL import Image import gdown import zipfile import pathlib # Ensure that these imports are at the beginning of your script to avoid any NameError issues. # Define the Google Drive shareable link gdrive_url = 'https://drive.google.com/file/d/1HjHYlQyRz5oWt8kehkt1TiOGRRlKFsv8/view?usp=drive_link' # Extract the file ID from the URL file_id = gdrive_url.split('/d/')[1].split('/view')[0] direct_download_url = f'https://drive.google.com/uc?id={file_id}' # Define the local filename to save the ZIP file local_zip_file = 'file.zip' # Download the ZIP file gdown.download(direct_download_url, local_zip_file, quiet=False) # Directory to extract files extracted_path = 'extracted_files' # Verify if the downloaded file is a ZIP file and extract it try: with zipfile.ZipFile(local_zip_file, 'r') as zip_ref: zip_ref.extractall(extracted_path) print("Extraction successful!") except zipfile.BadZipFile: print("Error: The downloaded file is not a valid ZIP file.") # Optionally, you can delete the ZIP file after extraction os.remove(local_zip_file) # Convert the extracted directory path to a pathlib.Path object data_dir = pathlib.Path(extracted_path) # Print the directory structure to debug for root, dirs, files in os.walk(extracted_path): level = root.replace(extracted_path, '').count(os.sep) indent = ' ' * 4 * (level) print(f"{indent}{os.path.basename(root)}/") subindent = ' ' * 4 * (level + 1) for f in files: print(f"{subindent}{f}") # Path to the dataset directory data_dir = pathlib.Path('extracted_files/Pest_Dataset') data_dir = pathlib.Path(data_dir) # Read images and labels into a DataFrame image_paths = list(data_dir.glob('*/*.jpg')) image_labels = [str(path.parent.name) for path in image_paths] image_df = pd.DataFrame({'Filepath': image_paths, 'Label': image_labels}) # Display distribution of labels label_counts = image_df['Label'].value_counts() plt.figure(figsize=(10, 6)) sns.barplot(x=label_counts.index, y=label_counts.values, alpha=0.8, palette='rocket') plt.title('Distribution of Labels in Image Dataset', fontsize=16) plt.xlabel('Label', fontsize=14) plt.ylabel('Count', fontsize=14) plt.xticks(rotation=45) plt.show() # Display 16 pictures of the dataset with their labels random_index = np.random.randint(0, len(image_df), 16) fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(10, 10), subplot_kw={'xticks': [], 'yticks': []}) for i, ax in enumerate(axes.flat): ax.imshow(plt.imread(image_df.Filepath[random_index[i]])) ax.set_title(image_df.Label[random_index[i]]) plt.tight_layout() plt.show() # Function to return a random image path from a given directory def random_sample(directory): images = [os.path.join(directory, img) for img in os.listdir(directory) if img.endswith(('.jpg', '.jpeg', '.png'))] return random.choice(images) # Function to compute the Error Level Analysis (ELA) of an image def compute_ela_cv(path, quality): temp_filename = 'temp.jpg' orig = cv2.imread(path) cv2.imwrite(temp_filename, orig, [int(cv2.IMWRITE_JPEG_QUALITY), quality]) compressed = cv2.imread(temp_filename) ela_image = cv2.absdiff(orig, compressed) ela_image = np.clip(ela_image * 10, 0, 255).astype(np.uint8) return ela_image # View random sample from the dataset p = random_sample('extracted_files/Pest_Dataset/beetle') orig = cv2.imread(p) orig = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) / 255.0 init_val = 100 columns = 3 rows = 3 fig = plt.figure(figsize=(15, 10)) for i in range(1, columns*rows + 1): quality = init_val - (i-1) * 8 img = compute_ela_cv(path=p, quality=quality) if i == 1: img = orig.copy() ax = fig.add_subplot(rows, columns, i) ax.title.set_text(f'q: {quality}') plt.imshow(img) plt.show() # Read images and labels into a DataFrame image_paths = list(data_dir.glob('*/*.jpg')) image_labels = [str(path.parent.name) for path in image_paths] image_df = pd.DataFrame({'Filepath': [str(path) for path in image_paths], 'Label': image_labels}) # Separate into train and test data train_df, test_df = train_test_split(image_df, test_size=0.2, shuffle=True, random_state=42) train_generator = ImageDataGenerator( preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input, validation_split=0.2 ) test_generator = ImageDataGenerator( preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input ) # Split the data into three categories train_images = train_generator.flow_from_dataframe( dataframe=train_df, x_col='Filepath', y_col='Label', target_size=(224, 224), color_mode='rgb', class_mode='categorical', batch_size=32, shuffle=True, seed=42, subset='training' ) val_images = train_generator.flow_from_dataframe( dataframe=train_df, x_col='Filepath', y_col='Label', target_size=(224, 224), color_mode='rgb', class_mode='categorical', batch_size=32, shuffle=True, seed=42, subset='validation' ) test_images = test_generator.flow_from_dataframe( dataframe=test_df, x_col='Filepath', y_col='Label', target_size=(224, 224), color_mode='rgb', class_mode='categorical', batch_size=32, shuffle=False ) # Data Augmentation Step augment = tf.keras.Sequential([ layers.experimental.preprocessing.Resizing(224, 224), layers.experimental.preprocessing.Rescaling(1./255), layers.experimental.preprocessing.RandomFlip("horizontal"), layers.experimental.preprocessing.RandomRotation(0.1), layers.experimental.preprocessing.RandomZoom(0.1), layers.experimental.preprocessing.RandomContrast(0.1), ]) # Load the pretrained model pretrained_model = tf.keras.applications.efficientnet_v2.EfficientNetV2L( input_shape=(224, 224, 3), include_top=False, weights='imagenet', pooling='max' ) pretrained_model.trainable = False # Create checkpoint callback checkpoint_path = "pests_cats_classification_model_checkpoint" checkpoint_callback = ModelCheckpoint(checkpoint_path, save_weights_only=True, monitor="val_accuracy", save_best_only=True) # Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 5 epochs early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True) inputs = pretrained_model.input x = augment(inputs) # Add new classification layers x = Flatten()(pretrained_model.output) x = Dense(256, activation='relu')(x) x = Dropout(0.5)(x) x = BatchNormalization()(x) x = Dense(128, activation='relu')(x) x = Dropout(0.5)(x) outputs = Dense(12, activation='softmax')(x) model = Model(inputs=inputs, outputs=outputs) model.compile( optimizer=Adam(0.00001), loss='categorical_crossentropy', metrics=['accuracy'] ) # Train the model history = model.fit( train_images, steps_per_epoch=len(train_images), validation_data=val_images, validation_steps=len(val_images), epochs=20, # Change epochs to 20 callbacks=[ early_stopping, checkpoint_callback, ] ) results = model.evaluate(test_images, verbose=0) print(" Test Loss: {:.5f}".format(results[0])) print("Test Accuracy: {:.2f}%".format(results[1] * 100)) accuracy = history.history['accuracy'] val_accuracy = history.history['val_accuracy'] loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(len(accuracy)) plt.plot(epochs, accuracy, 'b', label='Training accuracy') plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy') plt.title('Training and validation accuracy') plt.legend() plt.figure() plt.plot(epochs, loss, 'b', label='Training loss') plt.plot(epochs, val_loss, 'r', label='Validation loss') plt.title('Training and validation loss') plt.legend() plt.show() # Predict the label of the test_images pred = model.predict(test_images) pred = np.argmax(pred, axis=1) # Map the label labels = (train_images.class_indices) labels = dict((v, k) for k, v in labels.items()) pred = [labels[k] for k in pred] # Display the result print(f'The first 5 predictions: {pred[:5]}') # Display 25 random pictures from the dataset with their labels random_index = np.random.randint(0, len(test_df) - 1, 15) fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(25, 15), subplot_kw={'xticks': [], 'yticks': []}) for i, ax in enumerate(axes.flat): ax.imshow(plt.imread(test_df.Filepath.iloc[random_index[i]])) if test_df.Label.iloc[random_index[i]] == pred[random_index[i]]: color = "green" else: color = "red" ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}", color=color) plt.show() plt.tight_layout() y_test = list(test_df.Label) print(classification_report(y_test, pred)) report = classification_report(y_test, pred, output_dict=True) df = pd.DataFrame(report).transpose() df # Define function to get image array def get_img_array(img_path, size): img = tf.keras.preprocessing.image.load_img(img_path, target_size=size) array = tf.keras.preprocessing.image.img_to_array(img) array = np.expand_dims(array, axis=0) return array # Define function to make Grad-CAM heatmap def make_gradcam_heatmap(img_array, model, last_conv_layer_name, classifier_layer_names=None): grad_model = tf.keras.models.Model( [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output] ) with tf.GradientTape() as tape: conv_outputs, predictions = grad_model(img_array) loss = predictions[:, np.argmax(predictions[0])] output = conv_outputs[0] grads = tape.gradient(loss, conv_outputs)[0] gate_f = tf.cast(output > 0, "float32") gate_r = tf.cast(grads > 0, "float32") guided_grads = grads * gate_f * gate_r weights = tf.reduce_mean(guided_grads, axis=(0, 1)) cam = np.zeros(output.shape[0:2], dtype=np.float32) for i, w in enumerate(weights): cam += w * output[:, :, i] cam = cv2.resize(cam.numpy(), (img_array.shape[2], img_array.shape[1])) cam = np.maximum(cam, 0) heatmap = cam / cam.max() return heatmap # Define function to save and display Grad-CAM def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4): img = tf.keras.preprocessing.image.load_img(img_path) img = tf.keras.preprocessing.image.img_to_array(img) heatmap = np.uint8(255 * heatmap) jet = cm.get_cmap("jet") jet_colors = jet(np.arange(256))[:, :3] jet_heatmap = jet_colors[heatmap] jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap) jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0])) jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap) superimposed_img = jet_heatmap * alpha + img superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img) superimposed_img.save(cam_path) return cam_path # Display the part of the pictures used by the neural network to classify the pictures fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 10), subplot_kw={'xticks': [], 'yticks': []}) for i, ax in enumerate(axes.flat): img_path = test_df.Filepath.iloc[random_index[i]] img_array = tf.keras.applications.efficientnet_v2.preprocess_input(get_img_array(img_path, size=(224, 224))) heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name="top_conv") cam_path = save_and_display_gradcam(img_path, heatmap) ax.imshow(plt.imread(cam_path)) ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}") plt.tight_layout() plt.show() # Define Gradio interface def predict_image(img): img = np.array(img) img_resized = tf.image.resize(img, (224, 224)) img_4d = tf.expand_dims(img_resized, axis=0) prediction = model.predict(img_4d)[0] return {class_names[i]: float(prediction[i]) for i in range(len(class_names))} image = gr.Image() label = gr.Label(num_top_classes=1) # Define custom CSS for background image custom_css = """ body { background-image: url('extracted_files/Pest_Dataset/bees/bees (444).jpg'); background-size: cover; background-repeat: no-repeat; background-attachment: fixed; color: white; } """ gr.Interface( fn=predict_image, inputs=image, outputs=label, title="Welcome to Agricultural Pest Image Classification", description="The image data set used was obtained from Kaggle and has a collection of 12 different types of agricultural pests: Ants, Bees, Beetles, Caterpillars, Earthworms, Earwigs, Grasshoppers, Moths, Slugs, Snails, Wasps, and Weevils", css=custom_css ).launch(debug=True)