Spaces:

ANCKEM
/

itban3_final_project

Runtime error

NORLIE JHON MALAGDAO

Update app.py

2ae2cbd verified about 1 year ago

13.5 kB

	import gradio as gr
	import matplotlib.pyplot as plt
	import numpy as np
	import os
	import PIL
	import tensorflow as tf

	from tensorflow import keras
	from tensorflow.keras import layers
	from tensorflow.keras.models import Sequential
	from tensorflow.keras.preprocessing.image import ImageDataGenerator
	from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
	from tensorflow.keras.optimizers import Adam
	from tensorflow.keras.layers import Dense, Dropout, Flatten, BatchNormalization
	from tensorflow.keras.models import Model

	from sklearn.model_selection import train_test_split
	from sklearn.metrics import classification_report

	import pandas as pd
	import random
	import cv2

	from PIL import Image
	import gdown
	import zipfile

	import pathlib

	# Ensure that these imports are at the beginning of your script to avoid any NameError issues.


	# Define the Google Drive shareable link
	gdrive_url = 'https://drive.google.com/file/d/1HjHYlQyRz5oWt8kehkt1TiOGRRlKFsv8/view?usp=drive_link'

	# Extract the file ID from the URL
	file_id = gdrive_url.split('/d/')[1].split('/view')[0]
	direct_download_url = f'https://drive.google.com/uc?id={file_id}'

	# Define the local filename to save the ZIP file
	local_zip_file = 'file.zip'

	# Download the ZIP file
	gdown.download(direct_download_url, local_zip_file, quiet=False)

	# Directory to extract files
	extracted_path = 'extracted_files'

	# Verify if the downloaded file is a ZIP file and extract it
	try:
	with zipfile.ZipFile(local_zip_file, 'r') as zip_ref:
	zip_ref.extractall(extracted_path)
	print("Extraction successful!")
	except zipfile.BadZipFile:
	print("Error: The downloaded file is not a valid ZIP file.")

	# Optionally, you can delete the ZIP file after extraction
	os.remove(local_zip_file)

	# Convert the extracted directory path to a pathlib.Path object
	data_dir = pathlib.Path(extracted_path)

	# Print the directory structure to debug
	for root, dirs, files in os.walk(extracted_path):
	level = root.replace(extracted_path, '').count(os.sep)
	indent = ' ' * 4 * (level)
	print(f"{indent}{os.path.basename(root)}/")
	subindent = ' ' * 4 * (level + 1)
	for f in files:
	print(f"{subindent}{f}")

	# Path to the dataset directory
	data_dir = pathlib.Path('extracted_files/Pest_Dataset')
	data_dir = pathlib.Path(data_dir)

	# Read images and labels into a DataFrame
	image_paths = list(data_dir.glob('/.jpg'))
	image_labels = [str(path.parent.name) for path in image_paths]
	image_df = pd.DataFrame({'Filepath': image_paths, 'Label': image_labels})

	# Display distribution of labels
	label_counts = image_df['Label'].value_counts()
	plt.figure(figsize=(10, 6))
	sns.barplot(x=label_counts.index, y=label_counts.values, alpha=0.8, palette='rocket')
	plt.title('Distribution of Labels in Image Dataset', fontsize=16)
	plt.xlabel('Label', fontsize=14)
	plt.ylabel('Count', fontsize=14)
	plt.xticks(rotation=45)
	plt.show()

	# Display 16 pictures of the dataset with their labels
	random_index = np.random.randint(0, len(image_df), 16)
	fig, axes = plt.subplots(nrows=4, ncols=4, figsize=(10, 10),
	subplot_kw={'xticks': [], 'yticks': []})
	for i, ax in enumerate(axes.flat):
	ax.imshow(plt.imread(image_df.Filepath[random_index[i]]))
	ax.set_title(image_df.Label[random_index[i]])
	plt.tight_layout()
	plt.show()

	# Function to return a random image path from a given directory
	def random_sample(directory):
	images = [os.path.join(directory, img) for img in os.listdir(directory) if img.endswith(('.jpg', '.jpeg', '.png'))]
	return random.choice(images)

	# Function to compute the Error Level Analysis (ELA) of an image
	def compute_ela_cv(path, quality):
	temp_filename = 'temp.jpg'
	orig = cv2.imread(path)
	cv2.imwrite(temp_filename, orig, [int(cv2.IMWRITE_JPEG_QUALITY), quality])
	compressed = cv2.imread(temp_filename)
	ela_image = cv2.absdiff(orig, compressed)
	ela_image = np.clip(ela_image * 10, 0, 255).astype(np.uint8)
	return ela_image

	# View random sample from the dataset
	p = random_sample('extracted_files/Pest_Dataset/beetle')
	orig = cv2.imread(p)
	orig = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB) / 255.0
	init_val = 100
	columns = 3
	rows = 3

	fig = plt.figure(figsize=(15, 10))
	for i in range(1, columns*rows + 1):
	quality = init_val - (i-1) * 8
	img = compute_ela_cv(path=p, quality=quality)
	if i == 1:
	img = orig.copy()
	ax = fig.add_subplot(rows, columns, i)
	ax.title.set_text(f'q: {quality}')
	plt.imshow(img)
	plt.show()

	# Read images and labels into a DataFrame
	image_paths = list(data_dir.glob('/.jpg'))
	image_labels = [str(path.parent.name) for path in image_paths]
	image_df = pd.DataFrame({'Filepath': [str(path) for path in image_paths], 'Label': image_labels})

	# Separate into train and test data
	train_df, test_df = train_test_split(image_df, test_size=0.2, shuffle=True, random_state=42)

	train_generator = ImageDataGenerator(
	preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input,
	validation_split=0.2
	)

	test_generator = ImageDataGenerator(
	preprocessing_function=tf.keras.applications.efficientnet_v2.preprocess_input
	)

	# Split the data into three categories
	train_images = train_generator.flow_from_dataframe(
	dataframe=train_df,
	x_col='Filepath',
	y_col='Label',
	target_size=(224, 224),
	color_mode='rgb',
	class_mode='categorical',
	batch_size=32,
	shuffle=True,
	seed=42,
	subset='training'
	)

	val_images = train_generator.flow_from_dataframe(
	dataframe=train_df,
	x_col='Filepath',
	y_col='Label',
	target_size=(224, 224),
	color_mode='rgb',
	class_mode='categorical',
	batch_size=32,
	shuffle=True,
	seed=42,
	subset='validation'
	)

	test_images = test_generator.flow_from_dataframe(
	dataframe=test_df,
	x_col='Filepath',
	y_col='Label',
	target_size=(224, 224),
	color_mode='rgb',
	class_mode='categorical',
	batch_size=32,
	shuffle=False
	)


	# Data Augmentation Step
	augment = tf.keras.Sequential([
	layers.experimental.preprocessing.Resizing(224, 224),
	layers.experimental.preprocessing.Rescaling(1./255),
	layers.experimental.preprocessing.RandomFlip("horizontal"),
	layers.experimental.preprocessing.RandomRotation(0.1),
	layers.experimental.preprocessing.RandomZoom(0.1),
	layers.experimental.preprocessing.RandomContrast(0.1),
	])

	# Load the pretrained model
	pretrained_model = tf.keras.applications.efficientnet_v2.EfficientNetV2L(
	input_shape=(224, 224, 3),
	include_top=False,
	weights='imagenet',
	pooling='max'
	)

	pretrained_model.trainable = False

	# Create checkpoint callback
	checkpoint_path = "pests_cats_classification_model_checkpoint"
	checkpoint_callback = ModelCheckpoint(checkpoint_path,
	save_weights_only=True,
	monitor="val_accuracy",
	save_best_only=True)

	# Setup EarlyStopping callback to stop training if model's val_loss doesn't improve for 5 epochs
	early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)

	inputs = pretrained_model.input
	x = augment(inputs)

	# Add new classification layers
	x = Flatten()(pretrained_model.output)
	x = Dense(256, activation='relu')(x)
	x = Dropout(0.5)(x)
	x = BatchNormalization()(x)
	x = Dense(128, activation='relu')(x)
	x = Dropout(0.5)(x)

	outputs = Dense(12, activation='softmax')(x)

	model = Model(inputs=inputs, outputs=outputs)

	model.compile(
	optimizer=Adam(0.00001),
	loss='categorical_crossentropy',
	metrics=['accuracy']
	)

	# Train the model
	history = model.fit(
	train_images,
	steps_per_epoch=len(train_images),
	validation_data=val_images,
	validation_steps=len(val_images),
	epochs=20, # Change epochs to 20
	callbacks=[
	early_stopping,
	checkpoint_callback,
	]
	)

	results = model.evaluate(test_images, verbose=0)

	print(" Test Loss: {:.5f}".format(results[0]))
	print("Test Accuracy: {:.2f}%".format(results[1] * 100))

	accuracy = history.history['accuracy']
	val_accuracy = history.history['val_accuracy']

	loss = history.history['loss']
	val_loss = history.history['val_loss']

	epochs = range(len(accuracy))
	plt.plot(epochs, accuracy, 'b', label='Training accuracy')
	plt.plot(epochs, val_accuracy, 'r', label='Validation accuracy')

	plt.title('Training and validation accuracy')
	plt.legend()
	plt.figure()
	plt.plot(epochs, loss, 'b', label='Training loss')
	plt.plot(epochs, val_loss, 'r', label='Validation loss')

	plt.title('Training and validation loss')
	plt.legend()
	plt.show()

	# Predict the label of the test_images
	pred = model.predict(test_images)
	pred = np.argmax(pred, axis=1)

	# Map the label
	labels = (train_images.class_indices)
	labels = dict((v, k) for k, v in labels.items())
	pred = [labels[k] for k in pred]

	# Display the result
	print(f'The first 5 predictions: {pred[:5]}')

	# Display 25 random pictures from the dataset with their labels
	random_index = np.random.randint(0, len(test_df) - 1, 15)
	fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(25, 15),
	subplot_kw={'xticks': [], 'yticks': []})

	for i, ax in enumerate(axes.flat):
	ax.imshow(plt.imread(test_df.Filepath.iloc[random_index[i]]))
	if test_df.Label.iloc[random_index[i]] == pred[random_index[i]]:
	color = "green"
	else:
	color = "red"
	ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}", color=color)
	plt.show()
	plt.tight_layout()

	y_test = list(test_df.Label)
	print(classification_report(y_test, pred))

	report = classification_report(y_test, pred, output_dict=True)
	df = pd.DataFrame(report).transpose()
	df

	# Define function to get image array
	def get_img_array(img_path, size):
	img = tf.keras.preprocessing.image.load_img(img_path, target_size=size)
	array = tf.keras.preprocessing.image.img_to_array(img)
	array = np.expand_dims(array, axis=0)
	return array

	# Define function to make Grad-CAM heatmap
	def make_gradcam_heatmap(img_array, model, last_conv_layer_name, classifier_layer_names=None):
	grad_model = tf.keras.models.Model(
	[model.inputs], [model.get_layer(last_conv_layer_name).output, model.output]
	)

	with tf.GradientTape() as tape:
	conv_outputs, predictions = grad_model(img_array)
	loss = predictions[:, np.argmax(predictions[0])]

	output = conv_outputs[0]
	grads = tape.gradient(loss, conv_outputs)[0]

	gate_f = tf.cast(output > 0, "float32")
	gate_r = tf.cast(grads > 0, "float32")
	guided_grads = grads * gate_f * gate_r

	weights = tf.reduce_mean(guided_grads, axis=(0, 1))

	cam = np.zeros(output.shape[0:2], dtype=np.float32)

	for i, w in enumerate(weights):
	cam += w * output[:, :, i]

	cam = cv2.resize(cam.numpy(), (img_array.shape[2], img_array.shape[1]))
	cam = np.maximum(cam, 0)
	heatmap = cam / cam.max()

	return heatmap

	# Define function to save and display Grad-CAM
	def save_and_display_gradcam(img_path, heatmap, cam_path="cam.jpg", alpha=0.4):
	img = tf.keras.preprocessing.image.load_img(img_path)
	img = tf.keras.preprocessing.image.img_to_array(img)

	heatmap = np.uint8(255 * heatmap)

	jet = cm.get_cmap("jet")

	jet_colors = jet(np.arange(256))[:, :3]
	jet_heatmap = jet_colors[heatmap]

	jet_heatmap = tf.keras.preprocessing.image.array_to_img(jet_heatmap)
	jet_heatmap = jet_heatmap.resize((img.shape[1], img.shape[0]))
	jet_heatmap = tf.keras.preprocessing.image.img_to_array(jet_heatmap)

	superimposed_img = jet_heatmap * alpha + img
	superimposed_img = tf.keras.preprocessing.image.array_to_img(superimposed_img)

	superimposed_img.save(cam_path)

	return cam_path

	# Display the part of the pictures used by the neural network to classify the pictures
	fig, axes = plt.subplots(nrows=3, ncols=5, figsize=(15, 10),
	subplot_kw={'xticks': [], 'yticks': []})

	for i, ax in enumerate(axes.flat):
	img_path = test_df.Filepath.iloc[random_index[i]]
	img_array = tf.keras.applications.efficientnet_v2.preprocess_input(get_img_array(img_path, size=(224, 224)))
	heatmap = make_gradcam_heatmap(img_array, model, last_conv_layer_name="top_conv")
	cam_path = save_and_display_gradcam(img_path, heatmap)
	ax.imshow(plt.imread(cam_path))
	ax.set_title(f"True: {test_df.Label.iloc[random_index[i]]}\nPredicted: {pred[random_index[i]]}")
	plt.tight_layout()
	plt.show()

	# Define Gradio interface
	def predict_image(img):
	img = np.array(img)
	img_resized = tf.image.resize(img, (224, 224))
	img_4d = tf.expand_dims(img_resized, axis=0)
	prediction = model.predict(img_4d)[0]
	return {class_names[i]: float(prediction[i]) for i in range(len(class_names))}

	image = gr.Image()
	label = gr.Label(num_top_classes=1)

	# Define custom CSS for background image
	custom_css = """
	body {
	background-image: url('extracted_files/Pest_Dataset/bees/bees (444).jpg');
	background-size: cover;
	background-repeat: no-repeat;
	background-attachment: fixed;
	color: white;
	}
	"""

	gr.Interface(
	fn=predict_image,
	inputs=image,
	outputs=label,
	title="Welcome to Agricultural Pest Image Classification",
	description="The image data set used was obtained from Kaggle and has a collection of 12 different types of agricultural pests: Ants, Bees, Beetles, Caterpillars, Earthworms, Earwigs, Grasshoppers, Moths, Slugs, Snails, Wasps, and Weevils",
	css=custom_css
	).launch(debug=True)