Spaces:

ANCKEM
/

itban3_final_project

Runtime error

App Files Files Community

NORLIE JHON MALAGDAO commited on Jun 12, 2024

Commit

2ae2cbd

verified ·

1 Parent(s): f8c68bd

Update app.py

Browse files

Files changed (1) hide show

app.py +306 -79

app.py CHANGED Viewed

@@ -8,7 +8,18 @@ import tensorflow as tf
 from tensorflow import keras
 from tensorflow.keras import layers
 from tensorflow.keras.models import Sequential
 from PIL import Image
 import gdown
@@ -16,6 +27,9 @@ import zipfile
 import pathlib
 # Define the Google Drive shareable link
 gdrive_url = 'https://drive.google.com/file/d/1HjHYlQyRz5oWt8kehkt1TiOGRRlKFsv8/view?usp=drive_link'
@@ -55,113 +69,326 @@ for root, dirs, files in os.walk(extracted_path):
     for f in files:
         print(f"{subindent}{f}")
-import pathlib
 # Path to the dataset directory
 data_dir = pathlib.Path('extracted_files/Pest_Dataset')
 data_dir = pathlib.Path(data_dir)
-bees = list(data_dir.glob('bees/*'))
-print(bees[0])
-PIL.Image.open(str(bees[0]))
-bees = list(data_dir.glob('bees/*'))
-print(bees[0])
-PIL.Image.open(str(bees[0]))
-img_height,img_width=180,180
-batch_size=32
-train_ds = tf.keras.preprocessing.image_dataset_from_directory(
-  data_dir,
-  validation_split=0.2,
-  subset="training",
-  seed=123,
-  image_size=(img_height, img_width),
-  batch_size=batch_size)
-val_ds = tf.keras.preprocessing.image_dataset_from_directory(
-  data_dir,
-  validation_split=0.2,
-  subset="validation",
-  seed=123,
-  image_size=(img_height, img_width),
-  batch_size=batch_size)
-class_names = train_ds.class_names
-print(class_names)
-import matplotlib.pyplot as plt
-plt.figure(figsize=(10, 10))
-for images, labels in train_ds.take(1):
-  for i in range(9):
-    ax = plt.subplot(3, 3, i + 1)
-    plt.imshow(images[i].numpy().astype("uint8"))
-    plt.title(class_names[labels[i]])
-    plt.axis("off")
-data_augmentation = keras.Sequential(
-  [
-    layers.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)),
-    layers.RandomRotation(0.1),
-    layers.RandomZoom(0.1),
-  ]
 )
-plt.figure(figsize=(10, 10))
-for images, _ in train_ds.take(1):
-  for i in range(9):
-    augmented_images = data_augmentation(images)
-    ax = plt.subplot(3, 3, i + 1)
-    plt.imshow(augmented_images[0].numpy().astype("uint8"))
-    plt.axis("off")
-num_classes = len(class_names)
-model = Sequential([
-  data_augmentation,
-  layers.Rescaling(1./255),
-  layers.Conv2D(16, 3, padding='same', activation='relu'),
-  layers.MaxPooling2D(),
-  layers.Conv2D(32, 3, padding='same', activation='relu'),
-  layers.MaxPooling2D(),
-  layers.Conv2D(64, 3, padding='same', activation='relu'),
-  layers.MaxPooling2D(),
-  layers.Dropout(0.2),
-  layers.Flatten(),
-  layers.Dense(128, activation='relu'),
-  layers.Dense(num_classes, activation='softmax', name="outputs")  # Use softmax here
-])
-model.compile(optimizer='adam',
-              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),  # Change from_logits to False
-              metrics=['accuracy'])
-model.summary()
-epochs = 15
-history = model.fit(
-  train_ds,
-  validation_data=val_ds,
-  epochs=epochs
-)
-import gradio as gr
-import numpy as np
-import tensorflow as tf
 def predict_image(img):
     img = np.array(img)
-    img_resized = tf.image.resize(img, (180, 180))
     img_4d = tf.expand_dims(img_resized, axis=0)
     prediction = model.predict(img_4d)[0]
     return {class_names[i]: float(prediction[i]) for i in range(len(class_names))}

 from tensorflow import keras
 from tensorflow.keras import layers
 from tensorflow.keras.models import Sequential
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
+from tensorflow.keras.models import Model
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report
+import pandas as pd
+import random
+import cv2
 from PIL import Image
 import gdown
 import pathlib
+# Ensure that these imports are at the beginning of your script to avoid any NameError issues.
 # Define the Google Drive shareable link
 gdrive_url = 'https://drive.google.com/file/d/1HjHYlQyRz5oWt8kehkt1TiOGRRlKFsv8/view?usp=drive_link'
     for f in files:
         print(f"{subindent}{f}")
 # Path to the dataset directory
 data_dir = pathlib.Path('extracted_files/Pest_Dataset')
 data_dir = pathlib.Path(data_dir)
+# Read images and labels into a DataFrame
+image_paths = list(data_dir.glob('*/*.jpg'))
+image_labels = [str(path.parent.name) for path in image_paths]
+image_df = pd.DataFrame({'Filepath': image_paths, 'Label': image_labels})
+# Display distribution of labels
+label_counts = image_df['Label'].value_counts()
+plt.figure(figsize=(10, 6))
+sns.barplot(x=label_counts.index, y=label_counts.values, alpha=0.8, palette='rocket')
+plt.title('Distribution of Labels in Image Dataset', fontsize=16)
+plt.xlabel('Label', fontsize=14)
+plt.ylabel('Count', fontsize=14)
+plt.xticks(rotation=45)
+plt.show()
+# Display 16 pictures of the dataset with their labels
+random_index = np.random.randint(0, len(image_df), 16)
+fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(10, 10),
+                         subplot_kw={'xticks': [], 'yticks': []})
+for i, ax in enumerate(axes.flat):
+    ax.imshow(plt.imread(image_df.Filepath[random_index[i]]))
+    ax.set_title(image_df.Label[random_index[i]])
+plt.tight_layout()
+plt.show()
+# Function to return a random image path from a given directory
+def random_sample(directory):
+    images = [os.path.join(directory, img) for img in os.listdir(directory) if img.endswith(('.jpg', '.jpeg', '.png'))]
+    return random.choice(images)
+# Function to compute the Error Level Analysis (ELA) of an image
+def compute_ela_cv(path, quality):
+    temp_filename = 'temp.jpg'
+    orig = cv2.imread(path)
+    cv2.imwrite(temp_filename, orig, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
+    compressed = cv2.imread(temp_filename)
+    ela_image = cv2.absdiff(orig, compressed)
+    ela_image = np.clip(ela_image * 10, 0, 255).astype(np.uint8)
+    return ela_image
+# View random sample from the dataset
+p = random_sample('extracted_files/Pest_Dataset/beetle')
+orig = cv2.imread(p)
+orig = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) / 255.0
+init_val = 100
+columns = 3
+rows = 3
+fig = plt.figure(figsize=(15, 10))
+for i in range(1, columns*rows + 1):
+    quality = init_val - (i-1) * 8
+    img = compute_ela_cv(path=p, quality=quality)
+    if i == 1:
+        img = orig.copy()
+    ax = fig.add_subplot(rows, columns, i)
+    ax.title.set_text(f'q: {quality}')
+    plt.imshow(img)
+plt.show()
+# Read images and labels into a DataFrame
+image_paths = list(data_dir.glob('*/*.jpg'))
+image_labels = [str(path.parent.name) for path in image_paths]
+image_df = pd.DataFrame({'Filepath': [str(path) for path in image_paths], 'Label': image_labels})
+# Separate into train and test data
+train_df, test_df = train_test_split(image_df, test_size=0.2, shuffle=True, random_state=42)
+train_generator = ImageDataGenerator(
+    preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input,
+    validation_split=0.2
+)
+test_generator = ImageDataGenerator(
+    preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input
+)
+# Split the data into three categories
+train_images = train_generator.flow_from_dataframe(
+    dataframe=train_df,
+    x_col='Filepath',
+    y_col='Label',
+    target_size=(224, 224),
+    color_mode='rgb',
+    class_mode='categorical',
+    batch_size=32,
+    shuffle=True,
+    seed=42,
+    subset='training'
+)
+val_images = train_generator.flow_from_dataframe(
+    dataframe=train_df,
+    x_col='Filepath',
+    y_col='Label',
+    target_size=(224, 224),
+    color_mode='rgb',
+    class_mode='categorical',
+    batch_size=32,
+    shuffle=True,
+    seed=42,
+    subset='validation'
+)
+test_images = test_generator.flow_from_dataframe(
+    dataframe=test_df,
+    x_col='Filepath',
+    y_col='Label',
+    target_size=(224, 224),
+    color_mode='rgb',
+    class_mode='categorical',
+    batch_size=32,
+    shuffle=False
+)
+# Data Augmentation Step
+augment = tf.keras.Sequential([
+    layers.experimental.preprocessing.Resizing(224, 224),
+    layers.experimental.preprocessing.Rescaling(1./255),
+    layers.experimental.preprocessing.RandomFlip("horizontal"),
+    layers.experimental.preprocessing.RandomRotation(0.1),
+    layers.experimental.preprocessing.RandomZoom(0.1),
+    layers.experimental.preprocessing.RandomContrast(0.1),
+])
+# Load the pretrained model
+pretrained_model = tf.keras.applications.efficientnet_v2.EfficientNetV2L(
+    input_shape=(224, 224, 3),
+    include_top=False,
+    weights='imagenet',
+    pooling='max'
+)
+pretrained_model.trainable = False
+# Create checkpoint callback
+checkpoint_path = "pests_cats_classification_model_checkpoint"
+checkpoint_callback = ModelCheckpoint(checkpoint_path,
+                                      save_weights_only=True,
+                                      monitor="val_accuracy",
+                                      save_best_only=True)
+# Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 5 epochs
+early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
+inputs = pretrained_model.input
+x = augment(inputs)
+# Add new classification layers
+x = Flatten()(pretrained_model.output)
+x = Dense(256, activation='relu')(x)
+x = Dropout(0.5)(x)
+x = BatchNormalization()(x)
+x = Dense(128, activation='relu')(x)
+x = Dropout(0.5)(x)
+outputs = Dense(12, activation='softmax')(x)
+model = Model(inputs=inputs, outputs=outputs)
+model.compile(
+    optimizer=Adam(0.00001),
+    loss='categorical_crossentropy',
+    metrics=['accuracy']
 )
+# Train the model
+history = model.fit(
+    train_images,
+    steps_per_epoch=len(train_images),
+    validation_data=val_images,
+    validation_steps=len(val_images),
+    epochs=20,  # Change epochs to 20
+    callbacks=[
+        early_stopping,
+        checkpoint_callback,
+    ]
+)
+results = model.evaluate(test_images, verbose=0)
+print("    Test Loss: {:.5f}".format(results[0]))
+print("Test Accuracy: {:.2f}%".format(results[1] * 100))
+accuracy = history.history['accuracy']
+val_accuracy = history.history['val_accuracy']
+loss = history.history['loss']
+val_loss = history.history['val_loss']
+epochs = range(len(accuracy))
+plt.plot(epochs, accuracy, 'b', label='Training accuracy')
+plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy')
+plt.title('Training and validation accuracy')
+plt.legend()
+plt.figure()
+plt.plot(epochs, loss, 'b', label='Training loss')
+plt.plot(epochs, val_loss, 'r', label='Validation loss')
+plt.title('Training and validation loss')
+plt.legend()
+plt.show()
+# Predict the label of the test_images
+pred = model.predict(test_images)
+pred = np.argmax(pred, axis=1)
+# Map the label
+labels = (train_images.class_indices)
+labels = dict((v, k) for k, v in labels.items())
+pred = [labels[k] for k in pred]
+# Display the result
+print(f'The first 5 predictions: {pred[:5]}')
+# Display 25 random pictures from the dataset with their labels
+random_index = np.random.randint(0, len(test_df) - 1, 15)
+fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(25, 15),
+                         subplot_kw={'xticks': [], 'yticks': []})
+for i, ax in enumerate(axes.flat):
+    ax.imshow(plt.imread(test_df.Filepath.iloc[random_index[i]]))
+    if test_df.Label.iloc[random_index[i]] == pred[random_index[i]]:
+        color = "green"
+    else:
+        color = "red"
+    ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}", color=color)
+plt.show()
+plt.tight_layout()
+y_test = list(test_df.Label)
+print(classification_report(y_test, pred))
+report = classification_report(y_test, pred, output_dict=True)
+df = pd.DataFrame(report).transpose()
+df
+# Define function to get image array
+def get_img_array(img_path, size):
+    img = tf.keras.preprocessing.image.load_img(img_path, target_size=size)
+    array = tf.keras.preprocessing.image.img_to_array(img)
+    array = np.expand_dims(array, axis=0)
+    return array
+# Define function to make Grad-CAM heatmap
+def make_gradcam_heatmap(img_array, model, last_conv_layer_name, classifier_layer_names=None):
+    grad_model = tf.keras.models.Model(
+        [model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
+    )
+    with tf.GradientTape() as tape:
+        conv_outputs, predictions = grad_model(img_array)
+        loss = predictions[:, np.argmax(predictions[0])]
+    output = conv_outputs[0]
+    grads = tape.gradient(loss, conv_outputs)[0]
+    gate_f = tf.cast(output > 0, "float32")
+    gate_r = tf.cast(grads > 0, "float32")
+    guided_grads = grads * gate_f * gate_r
+    weights = tf.reduce_mean(guided_grads, axis=(0, 1))
+    cam = np.zeros(output.shape[0:2], dtype=np.float32)
+    for i, w in enumerate(weights):
+        cam += w * output[:, :, i]
+    cam = cv2.resize(cam.numpy(), (img_array.shape[2], img_array.shape[1]))
+    cam = np.maximum(cam, 0)
+    heatmap = cam / cam.max()
+    return heatmap
+# Define function to save and display Grad-CAM
+def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
+    img = tf.keras.preprocessing.image.load_img(img_path)
+    img = tf.keras.preprocessing.image.img_to_array(img)
+    heatmap = np.uint8(255 * heatmap)
+    jet = cm.get_cmap("jet")
+    jet_colors = jet(np.arange(256))[:, :3]
+    jet_heatmap = jet_colors[heatmap]
+    jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
+    jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
+    jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)
+    superimposed_img = jet_heatmap * alpha + img
+    superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img)
+    superimposed_img.save(cam_path)
+    return cam_path
+# Display the part of the pictures used by the neural network to classify the pictures
+fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 10),
+                         subplot_kw={'xticks': [], 'yticks': []})
+for i, ax in enumerate(axes.flat):
+    img_path = test_df.Filepath.iloc[random_index[i]]
+    img_array = tf.keras.applications.efficientnet_v2.preprocess_input(get_img_array(img_path, size=(224, 224)))
+    heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name="top_conv")
+    cam_path = save_and_display_gradcam(img_path, heatmap)
+    ax.imshow(plt.imread(cam_path))
+    ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}")
+plt.tight_layout()
+plt.show()
+# Define Gradio interface
 def predict_image(img):
     img = np.array(img)
+    img_resized = tf.image.resize(img, (224, 224))
     img_4d = tf.expand_dims(img_resized, axis=0)
     prediction = model.predict(img_4d)[0]
     return {class_names[i]: float(prediction[i]) for i in range(len(class_names))}