Problem Statement¶
In [ ]:
from tensorflow import keras
from tensorflow.keras import layers
import pathlib
from tensorflow.keras.utils import image_dataset_from_directory
import pandas as pd
import pathlib
from pathlib import Path
import numpy as np
import pandas as pd
# plotting modules
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
import plotly as plotly
plotly.offline.init_notebook_mode()
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
from keras.utils import to_categorical
from keras.models import load_model
import logging
import warnings
# Suppress TensorFlow warnings
tf.get_logger().setLevel(logging.ERROR)
# Suppress specific warnings
warnings.filterwarnings('ignore', category=UserWarning, module='tensorflow')
warnings.filterwarnings('ignore', category=FutureWarning, module='tensorflow')
import plotly.graph_objects as go
from tensorflow.keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report, precision_recall_curve, ConfusionMatrixDisplay
In [ ]:
tf.config.list_physical_devices('GPU')
Out[ ]:
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
In [ ]:
data_folder = pathlib.Path("../../../../../Downloads/archive/plantnet_300K")
In [ ]:
train_path = data_folder / "images_train_over_500"
val_path = data_folder / "images_val_over_500"
test_path = data_folder / "images_test_over_500"
In [ ]:
plant_folders = [x for x in train_path.glob('*') if x.is_dir()]
plant_folders = [x.name for x in plant_folders]
plant_folders[:10]
Out[ ]:
['1355932', '1355936', '1355937', '1355978', '1355990', '1356022', '1356075', '1356111', '1356126', '1356257']
In [ ]:
len(plant_folders)
Out[ ]:
108
In [ ]:
# how many images are in each folder
plant_count = {}
for plant in plant_folders:
plant_count[plant] = len(list(train_path.glob(f'{plant}/*')))
plant_count = {k: v for k, v in sorted(plant_count.items(), key=lambda item: item[1], reverse=True)}
plant_count
Out[ ]:
{'1363227': 7208, '1392475': 6337, '1356022': 6140, '1364099': 5334, '1355937': 5178, '1359517': 5063, '1357330': 4837, '1358752': 4502, '1359620': 4285, '1363128': 4005, '1363991': 3862, '1355936': 3419, '1394460': 3388, '1363740': 3353, '1394994': 3183, '1364173': 3031, '1359616': 2811, '1364164': 2788, '1361824': 2739, '1361823': 2738, '1397364': 2700, '1358095': 2468, '1363130': 2448, '1389510': 2385, '1374048': 2330, '1367432': 2245, '1409238': 2241, '1397268': 2200, '1393614': 2101, '1356781': 2007, '1369887': 1952, '1393241': 1941, '1394420': 1899, '1398178': 1779, '1408774': 1776, '1435714': 1762, '1394591': 1757, '1385937': 1730, '1355932': 1716, '1358094': 1700, '1393425': 1685, '1393423': 1671, '1398592': 1597, '1408961': 1578, '1358133': 1570, '1358766': 1534, '1361656': 1503, '1384485': 1440, '1356257': 1379, '1358689': 1359, '1394382': 1348, '1359498': 1319, '1362490': 1303, '1357635': 1291, '1355990': 1224, '1363336': 1181, '1396824': 1118, '1400100': 1070, '1418146': 1056, '1356075': 1040, '1356382': 1031, '1360978': 1030, '1363764': 1028, '1394454': 1022, '1364159': 1007, '1393393': 968, '1362294': 934, '1369960': 923, '1409295': 923, '1359669': 903, '1355978': 891, '1391483': 889, '1394404': 873, '1398515': 835, '1356111': 823, '1360671': 794, '1391192': 784, '1390637': 748, '1359625': 744, '1364172': 742, '1360998': 740, '1391652': 732, '1360588': 730, '1358605': 728, '1359488': 723, '1361759': 710, '1356126': 704, '1391226': 681, '1360153': 677, '1398128': 663, '1358751': 661, '1360590': 661, '1359485': 648, '1394489': 646, '1393792': 630, '1363737': 622, '1358105': 620, '1421021': 608, '1357677': 571, '1363749': 566, '1356421': 550, '1363490': 543, '1420863': 536, '1363699': 528, '1358150': 520, '1397420': 511, '1418547': 504, '1392695': 502}
In [ ]:
# how many folders have more than 500 images
len([k for k, v in plant_count.items() if v > 500])
Out[ ]:
108
In [ ]:
fig, axes = plt.subplots(10, 5, figsize=(15, 15))
axes = axes.ravel()
for i in range(50):
plant = plant_folders[i // 5]
img_files = list(train_path.glob(f'{plant}/*'))
if len(img_files) > i % 5:
img_path = img_files[i % 5]
img = plt.imread(img_path)
axes[i].imshow(img)
axes[i].axis('off')
axes[i].set_title(plant)
else:
axes[i].axis('off')
axes[i].set_title(f'{plant} (No Image)')
plt.tight_layout()
plt.show()
In [ ]:
image_size = (180, 180)
batch_size = 32
train_dataset = tf.keras.utils.image_dataset_from_directory(
train_path,
# shuffle=False,
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
validation_dataset = tf.keras.utils.image_dataset_from_directory(
val_path,
# shuffle=False,
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
test_dataset = tf.keras.utils.image_dataset_from_directory(
test_path,
# shuffle=False,
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
Found 188011 files belonging to 108 classes. Found 23571 files belonging to 108 classes. Found 23565 files belonging to 108 classes.
In [ ]:
class_names = test_dataset.class_names
num_classes = len(class_names)
In [ ]:
#normalization
normalization_layer = layers.Rescaling(1./255)
In [ ]:
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras import layers, models
base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(180, 180, 3))
In [ ]:
data_augmentation = tf.keras.Sequential([
layers.experimental.preprocessing.RandomFlip("horizontal"),
layers.experimental.preprocessing.RandomRotation(0.1),
layers.experimental.preprocessing.RandomZoom(0.1),
layers.experimental.preprocessing.RandomContrast(0.1)
])
In [ ]:
model = tf.keras.Sequential([
# Data augmentation layer
layers.Input(shape=(180, 180, 3)),
normalization_layer,
data_augmentation,
# Base model (DenseNet169)
base_model,
# Additional layers
layers.BatchNormalization(),
layers.GlobalAveragePooling2D(),
layers.Dense(1024, activation='relu'),
layers.Dropout(0.5), # Regularization
layers.Dense(num_classes, activation='softmax')
])
In [ ]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer,
loss=tf.keras.losses.SparseCategoricalCrossentropy(),
metrics=['accuracy'])
optimizer.lr.assign(0.0001)
Out[ ]:
<tf.Variable 'UnreadVariable' shape=() dtype=float32, numpy=1e-04>
In [ ]:
model.summary()
Model: "sequential_7" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= rescaling_3 (Rescaling) (None, 180, 180, 3) 0 sequential_6 (Sequential) (None, 180, 180, 3) 0 densenet169 (Functional) (None, 5, 5, 1664) 12642880 batch_normalization_3 (Batc (None, 5, 5, 1664) 6656 hNormalization) global_average_pooling2d_3 (None, 1664) 0 (GlobalAveragePooling2D) dense_6 (Dense) (None, 1024) 1704960 dropout_3 (Dropout) (None, 1024) 0 dense_7 (Dense) (None, 108) 110700 ================================================================= Total params: 14,465,196 Trainable params: 14,303,468 Non-trainable params: 161,728 _________________________________________________________________
In [ ]:
callbacks = [
keras.callbacks.ModelCheckpoint(
filepath="./models/densenet_with_augmentation_newer.keras",
save_best_only=True,
monitor="val_loss")
]
In [ ]:
epochs=10
history = model.fit(
train_dataset,
validation_data=validation_dataset,
epochs=epochs,
callbacks=callbacks
)
Epoch 1/10 5876/5876 [==============================] - 2673s 453ms/step - loss: 1.1688 - accuracy: 0.6882 - val_loss: 0.8507 - val_accuracy: 0.7524 Epoch 2/10 5876/5876 [==============================] - 2701s 460ms/step - loss: 0.6733 - accuracy: 0.7998 - val_loss: 0.7690 - val_accuracy: 0.7746 Epoch 3/10 5876/5876 [==============================] - 2697s 459ms/step - loss: 0.5577 - accuracy: 0.8295 - val_loss: 0.6638 - val_accuracy: 0.8047 Epoch 4/10 5876/5876 [==============================] - 2694s 458ms/step - loss: 0.4871 - accuracy: 0.8469 - val_loss: 0.7248 - val_accuracy: 0.7976 Epoch 5/10 5876/5876 [==============================] - 2698s 459ms/step - loss: 0.4392 - accuracy: 0.8598 - val_loss: 0.6050 - val_accuracy: 0.8272 Epoch 6/10 5876/5876 [==============================] - 2698s 459ms/step - loss: 0.4024 - accuracy: 0.8705 - val_loss: 0.6410 - val_accuracy: 0.8174 Epoch 7/10 5876/5876 [==============================] - 2695s 459ms/step - loss: 0.3709 - accuracy: 0.8782 - val_loss: 0.7701 - val_accuracy: 0.7919 Epoch 8/10 5876/5876 [==============================] - 2693s 458ms/step - loss: 0.3453 - accuracy: 0.8855 - val_loss: 0.6142 - val_accuracy: 0.8261 Epoch 9/10 5876/5876 [==============================] - 2684s 457ms/step - loss: 0.3263 - accuracy: 0.8910 - val_loss: 0.6070 - val_accuracy: 0.8297 Epoch 10/10 5876/5876 [==============================] - 2673s 455ms/step - loss: 0.3067 - accuracy: 0.8959 - val_loss: 0.6154 - val_accuracy: 0.8283
In [ ]:
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.3f}")
737/737 [==============================] - 50s 68ms/step - loss: 0.5948 - accuracy: 0.8283 Test accuracy: 0.828
In [ ]:
best_model = load_model("./models/densenet_with_augmentation_newer.keras")
best_model.evaluate(test_dataset)
737/737 [==============================] - 50s 65ms/step - loss: 0.5832 - accuracy: 0.8328
Out[ ]:
[0.5832472443580627, 0.8328453302383423]
Test dataset accuracy of 83.28%.